2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
38 __device__ __constant__ u32 c_tables[4][256] =
41 0x00072000, 0x00075000, 0x00074800, 0x00071000,
42 0x00076800, 0x00074000, 0x00070000, 0x00077000,
43 0x00073000, 0x00075800, 0x00070800, 0x00076000,
44 0x00073800, 0x00077800, 0x00072800, 0x00071800,
45 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
46 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
47 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
48 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
49 0x00022000, 0x00025000, 0x00024800, 0x00021000,
50 0x00026800, 0x00024000, 0x00020000, 0x00027000,
51 0x00023000, 0x00025800, 0x00020800, 0x00026000,
52 0x00023800, 0x00027800, 0x00022800, 0x00021800,
53 0x00062000, 0x00065000, 0x00064800, 0x00061000,
54 0x00066800, 0x00064000, 0x00060000, 0x00067000,
55 0x00063000, 0x00065800, 0x00060800, 0x00066000,
56 0x00063800, 0x00067800, 0x00062800, 0x00061800,
57 0x00032000, 0x00035000, 0x00034800, 0x00031000,
58 0x00036800, 0x00034000, 0x00030000, 0x00037000,
59 0x00033000, 0x00035800, 0x00030800, 0x00036000,
60 0x00033800, 0x00037800, 0x00032800, 0x00031800,
61 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
62 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
63 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
64 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
65 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
66 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
67 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
68 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
69 0x00052000, 0x00055000, 0x00054800, 0x00051000,
70 0x00056800, 0x00054000, 0x00050000, 0x00057000,
71 0x00053000, 0x00055800, 0x00050800, 0x00056000,
72 0x00053800, 0x00057800, 0x00052800, 0x00051800,
73 0x00012000, 0x00015000, 0x00014800, 0x00011000,
74 0x00016800, 0x00014000, 0x00010000, 0x00017000,
75 0x00013000, 0x00015800, 0x00010800, 0x00016000,
76 0x00013800, 0x00017800, 0x00012800, 0x00011800,
77 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
78 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
79 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
80 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
81 0x00042000, 0x00045000, 0x00044800, 0x00041000,
82 0x00046800, 0x00044000, 0x00040000, 0x00047000,
83 0x00043000, 0x00045800, 0x00040800, 0x00046000,
84 0x00043800, 0x00047800, 0x00042800, 0x00041800,
85 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
86 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
87 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
88 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
89 0x00002000, 0x00005000, 0x00004800, 0x00001000,
90 0x00006800, 0x00004000, 0x00000000, 0x00007000,
91 0x00003000, 0x00005800, 0x00000800, 0x00006000,
92 0x00003800, 0x00007800, 0x00002800, 0x00001800,
93 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
94 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
95 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
96 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
97 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
98 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
99 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
100 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
101 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
102 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
103 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
104 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
107 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
108 0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
109 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
110 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
111 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
112 0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
113 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
114 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
115 0x05280000, 0x05400000, 0x05080000, 0x05680000,
116 0x05500000, 0x05180000, 0x05200000, 0x05100000,
117 0x05700000, 0x05780000, 0x05600000, 0x05380000,
118 0x05300000, 0x05000000, 0x05480000, 0x05580000,
119 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
120 0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
121 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
122 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
123 0x00280000, 0x00400000, 0x00080000, 0x00680000,
124 0x00500000, 0x00180000, 0x00200000, 0x00100000,
125 0x00700000, 0x00780000, 0x00600000, 0x00380000,
126 0x00300000, 0x00000000, 0x00480000, 0x00580000,
127 0x04280000, 0x04400000, 0x04080000, 0x04680000,
128 0x04500000, 0x04180000, 0x04200000, 0x04100000,
129 0x04700000, 0x04780000, 0x04600000, 0x04380000,
130 0x04300000, 0x04000000, 0x04480000, 0x04580000,
131 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
132 0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
133 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
134 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
135 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
136 0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
137 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
138 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
139 0x07280000, 0x07400000, 0x07080000, 0x07680000,
140 0x07500000, 0x07180000, 0x07200000, 0x07100000,
141 0x07700000, 0x07780000, 0x07600000, 0x07380000,
142 0x07300000, 0x07000000, 0x07480000, 0x07580000,
143 0x02280000, 0x02400000, 0x02080000, 0x02680000,
144 0x02500000, 0x02180000, 0x02200000, 0x02100000,
145 0x02700000, 0x02780000, 0x02600000, 0x02380000,
146 0x02300000, 0x02000000, 0x02480000, 0x02580000,
147 0x03280000, 0x03400000, 0x03080000, 0x03680000,
148 0x03500000, 0x03180000, 0x03200000, 0x03100000,
149 0x03700000, 0x03780000, 0x03600000, 0x03380000,
150 0x03300000, 0x03000000, 0x03480000, 0x03580000,
151 0x06280000, 0x06400000, 0x06080000, 0x06680000,
152 0x06500000, 0x06180000, 0x06200000, 0x06100000,
153 0x06700000, 0x06780000, 0x06600000, 0x06380000,
154 0x06300000, 0x06000000, 0x06480000, 0x06580000,
155 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
156 0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
157 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
158 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
159 0x01280000, 0x01400000, 0x01080000, 0x01680000,
160 0x01500000, 0x01180000, 0x01200000, 0x01100000,
161 0x01700000, 0x01780000, 0x01600000, 0x01380000,
162 0x01300000, 0x01000000, 0x01480000, 0x01580000,
163 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
164 0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
165 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
166 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
167 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
168 0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
169 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
170 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
173 0x30000002, 0x60000002, 0x38000002, 0x08000002,
174 0x28000002, 0x78000002, 0x68000002, 0x40000002,
175 0x20000002, 0x50000002, 0x48000002, 0x70000002,
176 0x00000002, 0x18000002, 0x58000002, 0x10000002,
177 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
178 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
179 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
180 0x80000005, 0x98000005, 0xd8000005, 0x90000005,
181 0x30000005, 0x60000005, 0x38000005, 0x08000005,
182 0x28000005, 0x78000005, 0x68000005, 0x40000005,
183 0x20000005, 0x50000005, 0x48000005, 0x70000005,
184 0x00000005, 0x18000005, 0x58000005, 0x10000005,
185 0x30000000, 0x60000000, 0x38000000, 0x08000000,
186 0x28000000, 0x78000000, 0x68000000, 0x40000000,
187 0x20000000, 0x50000000, 0x48000000, 0x70000000,
188 0x00000000, 0x18000000, 0x58000000, 0x10000000,
189 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
190 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
191 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
192 0x80000003, 0x98000003, 0xd8000003, 0x90000003,
193 0x30000001, 0x60000001, 0x38000001, 0x08000001,
194 0x28000001, 0x78000001, 0x68000001, 0x40000001,
195 0x20000001, 0x50000001, 0x48000001, 0x70000001,
196 0x00000001, 0x18000001, 0x58000001, 0x10000001,
197 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
198 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
199 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
200 0x80000000, 0x98000000, 0xd8000000, 0x90000000,
201 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
202 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
203 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
204 0x80000006, 0x98000006, 0xd8000006, 0x90000006,
205 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
206 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
207 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
208 0x80000001, 0x98000001, 0xd8000001, 0x90000001,
209 0x30000003, 0x60000003, 0x38000003, 0x08000003,
210 0x28000003, 0x78000003, 0x68000003, 0x40000003,
211 0x20000003, 0x50000003, 0x48000003, 0x70000003,
212 0x00000003, 0x18000003, 0x58000003, 0x10000003,
213 0x30000004, 0x60000004, 0x38000004, 0x08000004,
214 0x28000004, 0x78000004, 0x68000004, 0x40000004,
215 0x20000004, 0x50000004, 0x48000004, 0x70000004,
216 0x00000004, 0x18000004, 0x58000004, 0x10000004,
217 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
218 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
219 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
220 0x80000002, 0x98000002, 0xd8000002, 0x90000002,
221 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
222 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
223 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
224 0x80000004, 0x98000004, 0xd8000004, 0x90000004,
225 0x30000006, 0x60000006, 0x38000006, 0x08000006,
226 0x28000006, 0x78000006, 0x68000006, 0x40000006,
227 0x20000006, 0x50000006, 0x48000006, 0x70000006,
228 0x00000006, 0x18000006, 0x58000006, 0x10000006,
229 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
230 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
231 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
232 0x80000007, 0x98000007, 0xd8000007, 0x90000007,
233 0x30000007, 0x60000007, 0x38000007, 0x08000007,
234 0x28000007, 0x78000007, 0x68000007, 0x40000007,
235 0x20000007, 0x50000007, 0x48000007, 0x70000007,
236 0x00000007, 0x18000007, 0x58000007, 0x10000007,
239 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
240 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
241 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
242 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
243 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
244 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
245 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
246 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
247 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
248 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
249 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
250 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
251 0x00000068, 0x00000058, 0x00000020, 0x00000008,
252 0x00000018, 0x00000078, 0x00000028, 0x00000048,
253 0x00000000, 0x00000050, 0x00000070, 0x00000038,
254 0x00000030, 0x00000040, 0x00000010, 0x00000060,
255 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
256 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
257 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
258 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
259 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
260 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
261 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
262 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
263 0x00000568, 0x00000558, 0x00000520, 0x00000508,
264 0x00000518, 0x00000578, 0x00000528, 0x00000548,
265 0x00000500, 0x00000550, 0x00000570, 0x00000538,
266 0x00000530, 0x00000540, 0x00000510, 0x00000560,
267 0x00000268, 0x00000258, 0x00000220, 0x00000208,
268 0x00000218, 0x00000278, 0x00000228, 0x00000248,
269 0x00000200, 0x00000250, 0x00000270, 0x00000238,
270 0x00000230, 0x00000240, 0x00000210, 0x00000260,
271 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
272 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
273 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
274 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
275 0x00000168, 0x00000158, 0x00000120, 0x00000108,
276 0x00000118, 0x00000178, 0x00000128, 0x00000148,
277 0x00000100, 0x00000150, 0x00000170, 0x00000138,
278 0x00000130, 0x00000140, 0x00000110, 0x00000160,
279 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
280 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
281 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
282 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
283 0x00000768, 0x00000758, 0x00000720, 0x00000708,
284 0x00000718, 0x00000778, 0x00000728, 0x00000748,
285 0x00000700, 0x00000750, 0x00000770, 0x00000738,
286 0x00000730, 0x00000740, 0x00000710, 0x00000760,
287 0x00000368, 0x00000358, 0x00000320, 0x00000308,
288 0x00000318, 0x00000378, 0x00000328, 0x00000348,
289 0x00000300, 0x00000350, 0x00000370, 0x00000338,
290 0x00000330, 0x00000340, 0x00000310, 0x00000360,
291 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
292 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
293 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
294 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
295 0x00000468, 0x00000458, 0x00000420, 0x00000408,
296 0x00000418, 0x00000478, 0x00000428, 0x00000448,
297 0x00000400, 0x00000450, 0x00000470, 0x00000438,
298 0x00000430, 0x00000440, 0x00000410, 0x00000460,
299 0x00000668, 0x00000658, 0x00000620, 0x00000608,
300 0x00000618, 0x00000678, 0x00000628, 0x00000648,
301 0x00000600, 0x00000650, 0x00000670, 0x00000638,
302 0x00000630, 0x00000640, 0x00000610, 0x00000660,
307 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
311 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
314 #define round(k1,k2,tbl) \
318 l ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \
319 BOX ((t >> 8) & 0xff, 1, tbl) ^ \
320 BOX ((t >> 16) & 0xff, 2, tbl) ^ \
321 BOX ((t >> 24) & 0xff, 3, tbl); \
323 r ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \
324 BOX ((t >> 8) & 0xff, 1, tbl) ^ \
325 BOX ((t >> 16) & 0xff, 2, tbl) ^ \
326 BOX ((t >> 24) & 0xff, 3, tbl); \
329 #define R(k,h,s,i,t) \
335 round (k[0], k[1], t); \
336 round (k[2], k[3], t); \
337 round (k[4], k[5], t); \
338 round (k[6], k[7], t); \
339 round (k[0], k[1], t); \
340 round (k[2], k[3], t); \
341 round (k[4], k[5], t); \
342 round (k[6], k[7], t); \
343 round (k[0], k[1], t); \
344 round (k[2], k[3], t); \
345 round (k[4], k[5], t); \
346 round (k[6], k[7], t); \
347 round (k[7], k[6], t); \
348 round (k[5], k[4], t); \
349 round (k[3], k[2], t); \
350 round (k[1], k[0], t); \
356 w[0] = u[0] ^ v[0]; \
357 w[1] = u[1] ^ v[1]; \
358 w[2] = u[2] ^ v[2]; \
359 w[3] = u[3] ^ v[3]; \
360 w[4] = u[4] ^ v[4]; \
361 w[5] = u[5] ^ v[5]; \
362 w[6] = u[6] ^ v[6]; \
366 k[0] = ((w[0] & 0x000000ff) << 0) \
367 | ((w[2] & 0x000000ff) << 8) \
368 | ((w[4] & 0x000000ff) << 16) \
369 | ((w[6] & 0x000000ff) << 24); \
370 k[1] = ((w[0] & 0x0000ff00) >> 8) \
371 | ((w[2] & 0x0000ff00) >> 0) \
372 | ((w[4] & 0x0000ff00) << 8) \
373 | ((w[6] & 0x0000ff00) << 16); \
374 k[2] = ((w[0] & 0x00ff0000) >> 16) \
375 | ((w[2] & 0x00ff0000) >> 8) \
376 | ((w[4] & 0x00ff0000) << 0) \
377 | ((w[6] & 0x00ff0000) << 8); \
378 k[3] = ((w[0] & 0xff000000) >> 24) \
379 | ((w[2] & 0xff000000) >> 16) \
380 | ((w[4] & 0xff000000) >> 8) \
381 | ((w[6] & 0xff000000) >> 0); \
382 k[4] = ((w[1] & 0x000000ff) << 0) \
383 | ((w[3] & 0x000000ff) << 8) \
384 | ((w[5] & 0x000000ff) << 16) \
385 | ((w[7] & 0x000000ff) << 24); \
386 k[5] = ((w[1] & 0x0000ff00) >> 8) \
387 | ((w[3] & 0x0000ff00) >> 0) \
388 | ((w[5] & 0x0000ff00) << 8) \
389 | ((w[7] & 0x0000ff00) << 16); \
390 k[6] = ((w[1] & 0x00ff0000) >> 16) \
391 | ((w[3] & 0x00ff0000) >> 8) \
392 | ((w[5] & 0x00ff0000) << 0) \
393 | ((w[7] & 0x00ff0000) << 8); \
394 k[7] = ((w[1] & 0xff000000) >> 24) \
395 | ((w[3] & 0xff000000) >> 16) \
396 | ((w[5] & 0xff000000) >> 8) \
397 | ((w[7] & 0xff000000) >> 0);
434 x[0] ^= 0xff00ff00; \
435 x[1] ^= 0xff00ff00; \
436 x[2] ^= 0x00ff00ff; \
437 x[3] ^= 0x00ff00ff; \
438 x[4] ^= 0x00ffff00; \
439 x[5] ^= 0xff0000ff; \
440 x[6] ^= 0x000000ff; \
443 #define SHIFT12(u,m,s) \
444 u[0] = m[0] ^ s[6]; \
445 u[1] = m[1] ^ s[7]; \
446 u[2] = m[2] ^ (s[0] << 16) \
448 ^ (s[0] & 0x0000ffff) \
449 ^ (s[1] & 0x0000ffff) \
454 ^ (s[7] & 0xffff0000) \
456 u[3] = m[3] ^ (s[0] & 0x0000ffff) \
458 ^ (s[1] & 0x0000ffff) \
467 ^ (s[7] & 0x0000ffff) \
470 u[4] = m[4] ^ (s[0] & 0xffff0000) \
473 ^ (s[1] & 0xffff0000) \
482 ^ (s[7] & 0x0000ffff) \
485 u[5] = m[5] ^ (s[0] << 16) \
487 ^ (s[0] & 0xffff0000) \
488 ^ (s[1] & 0x0000ffff) \
498 ^ (s[7] & 0xffff0000) \
514 u[7] = m[7] ^ (s[0] & 0xffff0000) \
516 ^ (s[1] & 0x0000ffff) \
525 ^ (s[7] & 0x0000ffff) \
529 #define SHIFT16(h,v,u) \
530 v[0] = h[0] ^ (u[1] << 16) \
532 v[1] = h[1] ^ (u[2] << 16) \
534 v[2] = h[2] ^ (u[3] << 16) \
536 v[3] = h[3] ^ (u[4] << 16) \
538 v[4] = h[4] ^ (u[5] << 16) \
540 v[5] = h[5] ^ (u[6] << 16) \
542 v[6] = h[6] ^ (u[7] << 16) \
544 v[7] = h[7] ^ (u[0] & 0xffff0000) \
547 ^ (u[1] & 0xffff0000) \
550 ^ (u[7] & 0xffff0000);
552 #define SHIFT61(h,v) \
553 h[0] = (v[0] & 0xffff0000) \
557 ^ (v[1] & 0xffff0000) \
566 ^ (v[7] & 0x0000ffff); \
567 h[1] = (v[0] << 16) \
569 ^ (v[0] & 0xffff0000) \
570 ^ (v[1] & 0x0000ffff) \
578 ^ (v[7] & 0xffff0000) \
580 h[2] = (v[0] & 0x0000ffff) \
584 ^ (v[1] & 0xffff0000) \
592 ^ (v[7] & 0x0000ffff) \
595 h[3] = (v[0] << 16) \
597 ^ (v[0] & 0xffff0000) \
598 ^ (v[1] & 0xffff0000) \
608 ^ (v[7] & 0x0000ffff) \
610 h[4] = (v[0] >> 16) \
624 h[5] = (v[0] << 16) \
625 ^ (v[0] & 0xffff0000) \
628 ^ (v[1] & 0xffff0000) \
642 ^ (v[7] & 0xffff0000); \
674 #define PASS0(h,s,u,v,t) \
685 #define PASS2(h,s,u,v,t) \
697 #define PASS4(h,s,u,v,t) \
708 #define PASS6(h,s,u,v,t) \
717 __device__ __constant__ comb_t c_combs[1024];
719 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
725 const u32 lid = threadIdx.x;
731 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
735 wordl0[0] = pws[gid].i[ 0];
736 wordl0[1] = pws[gid].i[ 1];
737 wordl0[2] = pws[gid].i[ 2];
738 wordl0[3] = pws[gid].i[ 3];
742 wordl1[0] = pws[gid].i[ 4];
743 wordl1[1] = pws[gid].i[ 5];
744 wordl1[2] = pws[gid].i[ 6];
745 wordl1[3] = pws[gid].i[ 7];
761 const u32 pw_l_len = pws[gid].pw_len;
763 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
765 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
772 __shared__ u32 s_tables[4][256];
774 s_tables[0][lid] = c_tables[0][lid];
775 s_tables[1][lid] = c_tables[1][lid];
776 s_tables[2][lid] = c_tables[2][lid];
777 s_tables[3][lid] = c_tables[3][lid];
781 if (gid >= gid_max) return;
787 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
789 const u32 pw_r_len = c_combs[il_pos].pw_len;
791 const u32 pw_len = pw_l_len + pw_r_len;
795 wordr0[0] = c_combs[il_pos].i[0];
796 wordr0[1] = c_combs[il_pos].i[1];
797 wordr0[2] = c_combs[il_pos].i[2];
798 wordr0[3] = c_combs[il_pos].i[3];
802 wordr1[0] = c_combs[il_pos].i[4];
803 wordr1[1] = c_combs[il_pos].i[5];
804 wordr1[2] = c_combs[il_pos].i[6];
805 wordr1[3] = c_combs[il_pos].i[7];
821 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
823 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
828 w0[0] = wordl0[0] | wordr0[0];
829 w0[1] = wordl0[1] | wordr0[1];
830 w0[2] = wordl0[2] | wordr0[2];
831 w0[3] = wordl0[3] | wordr0[3];
835 w1[0] = wordl1[0] | wordr1[0];
836 w1[1] = wordl1[1] | wordr1[1];
837 w1[2] = wordl1[2] | wordr1[2];
838 w1[3] = wordl1[3] | wordr1[3];
842 w2[0] = wordl2[0] | wordr2[0];
843 w2[1] = wordl2[1] | wordr2[1];
844 w2[2] = wordl2[2] | wordr2[2];
845 w2[3] = wordl2[3] | wordr2[3];
849 w3[0] = wordl3[0] | wordr3[0];
850 w3[1] = wordl3[1] | wordr3[1];
854 const u32 w14 = pw_len * 8;
891 state_m[0] = state[0];
892 state_m[1] = state[1];
893 state_m[2] = state[2];
894 state_m[3] = state[3];
895 state_m[4] = state[4];
896 state_m[5] = state[5];
897 state_m[6] = state[6];
898 state_m[7] = state[7];
911 PASS0 (state, tmp, state_m, data_m, s_tables);
912 PASS2 (state, tmp, state_m, data_m, s_tables);
913 PASS4 (state, tmp, state_m, data_m, s_tables);
914 PASS6 (state, tmp, state_m, data_m, s_tables);
916 SHIFT12 (state_m, data, tmp);
917 SHIFT16 (state, data_m, state_m);
918 SHIFT61 (state, data_m);
931 state_m[0] = state[0];
932 state_m[1] = state[1];
933 state_m[2] = state[2];
934 state_m[3] = state[3];
935 state_m[4] = state[4];
936 state_m[5] = state[5];
937 state_m[6] = state[6];
938 state_m[7] = state[7];
949 PASS0 (state, tmp, state_m, data_m, s_tables);
950 PASS2 (state, tmp, state_m, data_m, s_tables);
951 PASS4 (state, tmp, state_m, data_m, s_tables);
952 PASS6 (state, tmp, state_m, data_m, s_tables);
954 SHIFT12 (state_m, data, tmp);
955 SHIFT16 (state, data_m, state_m);
956 SHIFT61 (state, data_m);
969 state_m[0] = state[0];
970 state_m[1] = state[1];
971 state_m[2] = state[2];
972 state_m[3] = state[3];
973 state_m[4] = state[4];
974 state_m[5] = state[5];
975 state_m[6] = state[6];
976 state_m[7] = state[7];
987 PASS0 (state, tmp, state_m, data_m, s_tables);
988 PASS2 (state, tmp, state_m, data_m, s_tables);
989 PASS4 (state, tmp, state_m, data_m, s_tables);
990 PASS6 (state, tmp, state_m, data_m, s_tables);
992 SHIFT12 (state_m, data, tmp);
993 SHIFT16 (state, data_m, state_m);
994 SHIFT61 (state, data_m);
998 const u32x r0 = state[0];
999 const u32x r1 = state[1];
1000 const u32x r2 = state[2];
1001 const u32x r3 = state[3];
1003 #include VECT_COMPARE_M
1007 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1011 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1015 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1021 const u32 lid = threadIdx.x;
1027 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1031 wordl0[0] = pws[gid].i[ 0];
1032 wordl0[1] = pws[gid].i[ 1];
1033 wordl0[2] = pws[gid].i[ 2];
1034 wordl0[3] = pws[gid].i[ 3];
1038 wordl1[0] = pws[gid].i[ 4];
1039 wordl1[1] = pws[gid].i[ 5];
1040 wordl1[2] = pws[gid].i[ 6];
1041 wordl1[3] = pws[gid].i[ 7];
1057 const u32 pw_l_len = pws[gid].pw_len;
1059 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
1061 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
1068 __shared__ u32 s_tables[4][256];
1070 s_tables[0][lid] = c_tables[0][lid];
1071 s_tables[1][lid] = c_tables[1][lid];
1072 s_tables[2][lid] = c_tables[2][lid];
1073 s_tables[3][lid] = c_tables[3][lid];
1077 if (gid >= gid_max) return;
1083 const u32 search[4] =
1085 digests_buf[digests_offset].digest_buf[DGST_R0],
1086 digests_buf[digests_offset].digest_buf[DGST_R1],
1087 digests_buf[digests_offset].digest_buf[DGST_R2],
1088 digests_buf[digests_offset].digest_buf[DGST_R3]
1095 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
1097 const u32 pw_r_len = c_combs[il_pos].pw_len;
1099 const u32 pw_len = pw_l_len + pw_r_len;
1103 wordr0[0] = c_combs[il_pos].i[0];
1104 wordr0[1] = c_combs[il_pos].i[1];
1105 wordr0[2] = c_combs[il_pos].i[2];
1106 wordr0[3] = c_combs[il_pos].i[3];
1110 wordr1[0] = c_combs[il_pos].i[4];
1111 wordr1[1] = c_combs[il_pos].i[5];
1112 wordr1[2] = c_combs[il_pos].i[6];
1113 wordr1[3] = c_combs[il_pos].i[7];
1129 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
1131 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
1136 w0[0] = wordl0[0] | wordr0[0];
1137 w0[1] = wordl0[1] | wordr0[1];
1138 w0[2] = wordl0[2] | wordr0[2];
1139 w0[3] = wordl0[3] | wordr0[3];
1143 w1[0] = wordl1[0] | wordr1[0];
1144 w1[1] = wordl1[1] | wordr1[1];
1145 w1[2] = wordl1[2] | wordr1[2];
1146 w1[3] = wordl1[3] | wordr1[3];
1150 w2[0] = wordl2[0] | wordr2[0];
1151 w2[1] = wordl2[1] | wordr2[1];
1152 w2[2] = wordl2[2] | wordr2[2];
1153 w2[3] = wordl2[3] | wordr2[3];
1157 w3[0] = wordl3[0] | wordr3[0];
1158 w3[1] = wordl3[1] | wordr3[1];
1162 const u32 w14 = pw_len * 8;
1185 state[ 8] = data[0];
1186 state[ 9] = data[1];
1187 state[10] = data[2];
1188 state[11] = data[3];
1189 state[12] = data[4];
1190 state[13] = data[5];
1191 state[14] = data[6];
1192 state[15] = data[7];
1199 state_m[0] = state[0];
1200 state_m[1] = state[1];
1201 state_m[2] = state[2];
1202 state_m[3] = state[3];
1203 state_m[4] = state[4];
1204 state_m[5] = state[5];
1205 state_m[6] = state[6];
1206 state_m[7] = state[7];
1208 data_m[0] = data[0];
1209 data_m[1] = data[1];
1210 data_m[2] = data[2];
1211 data_m[3] = data[3];
1212 data_m[4] = data[4];
1213 data_m[5] = data[5];
1214 data_m[6] = data[6];
1215 data_m[7] = data[7];
1219 PASS0 (state, tmp, state_m, data_m, s_tables);
1220 PASS2 (state, tmp, state_m, data_m, s_tables);
1221 PASS4 (state, tmp, state_m, data_m, s_tables);
1222 PASS6 (state, tmp, state_m, data_m, s_tables);
1224 SHIFT12 (state_m, data, tmp);
1225 SHIFT16 (state, data_m, state_m);
1226 SHIFT61 (state, data_m);
1239 state_m[0] = state[0];
1240 state_m[1] = state[1];
1241 state_m[2] = state[2];
1242 state_m[3] = state[3];
1243 state_m[4] = state[4];
1244 state_m[5] = state[5];
1245 state_m[6] = state[6];
1246 state_m[7] = state[7];
1248 data_m[0] = data[0];
1249 data_m[1] = data[1];
1250 data_m[2] = data[2];
1251 data_m[3] = data[3];
1252 data_m[4] = data[4];
1253 data_m[5] = data[5];
1254 data_m[6] = data[6];
1255 data_m[7] = data[7];
1257 PASS0 (state, tmp, state_m, data_m, s_tables);
1258 PASS2 (state, tmp, state_m, data_m, s_tables);
1259 PASS4 (state, tmp, state_m, data_m, s_tables);
1260 PASS6 (state, tmp, state_m, data_m, s_tables);
1262 SHIFT12 (state_m, data, tmp);
1263 SHIFT16 (state, data_m, state_m);
1264 SHIFT61 (state, data_m);
1268 data[0] = state[ 8];
1269 data[1] = state[ 9];
1270 data[2] = state[10];
1271 data[3] = state[11];
1272 data[4] = state[12];
1273 data[5] = state[13];
1274 data[6] = state[14];
1275 data[7] = state[15];
1277 state_m[0] = state[0];
1278 state_m[1] = state[1];
1279 state_m[2] = state[2];
1280 state_m[3] = state[3];
1281 state_m[4] = state[4];
1282 state_m[5] = state[5];
1283 state_m[6] = state[6];
1284 state_m[7] = state[7];
1286 data_m[0] = data[0];
1287 data_m[1] = data[1];
1288 data_m[2] = data[2];
1289 data_m[3] = data[3];
1290 data_m[4] = data[4];
1291 data_m[5] = data[5];
1292 data_m[6] = data[6];
1293 data_m[7] = data[7];
1295 PASS0 (state, tmp, state_m, data_m, s_tables);
1296 PASS2 (state, tmp, state_m, data_m, s_tables);
1297 PASS4 (state, tmp, state_m, data_m, s_tables);
1298 PASS6 (state, tmp, state_m, data_m, s_tables);
1300 SHIFT12 (state_m, data, tmp);
1301 SHIFT16 (state, data_m, state_m);
1302 SHIFT61 (state, data_m);
1306 const u32x r0 = state[0];
1307 const u32x r1 = state[1];
1308 const u32x r2 = state[2];
1309 const u32x r3 = state[3];
1311 #include VECT_COMPARE_S
1315 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1319 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)