Merge branch 'master' of https://github.com/hashcat/oclHashcat
[hashcat.git] / OpenCL / simd.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 // vliw1
9
10 #if VECT_SIZE == 1
11
12 #define MATCHES_ONE_VV(a,b) ((a) == (b))
13 #define MATCHES_ONE_VS(a,b) ((a) == (b))
14
15 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
16 { \
17 if (((h0) == search[0]) && ((h1) == search[1]) && ((h2) == search[2]) && ((h3) == search[3])) \
18 { \
19 const u32 final_hash_pos = digests_offset + 0; \
20 \
21 if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \
22 { \
23 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos); \
24 } \
25 } \
26 }
27
28 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
29 { \
30 const u32 digest_tp0[4] = { h0, h1, h2, h3 }; \
31 \
32 if (check (digest_tp0, \
33 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
34 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
35 bitmap_mask, \
36 bitmap_shift1, \
37 bitmap_shift2)) \
38 { \
39 int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
40 \
41 if (digest_pos != -1) \
42 { \
43 const u32 final_hash_pos = digests_offset + digest_pos; \
44 \
45 if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \
46 { \
47 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos); \
48 } \
49 } \
50 } \
51 }
52
53 #endif
54
55 // vliw2
56
57 #define vector_accessible(p,c,e) (((p) + (e)) < (c))
58
59 #if VECT_SIZE == 2
60
61 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1))
62 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ))
63
64 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
65 { \
66 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
67 { \
68 const u32 final_hash_pos = digests_offset + 0; \
69 \
70 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
71 { \
72 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 0); \
73 } \
74 } \
75 \
76 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
77 { \
78 const u32 final_hash_pos = digests_offset + 0; \
79 \
80 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
81 { \
82 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 1); \
83 } \
84 } \
85 }
86
87 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
88 { \
89 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
90 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
91 \
92 if (check (digest_tp0, \
93 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
94 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
95 bitmap_mask, \
96 bitmap_shift1, \
97 bitmap_shift2)) \
98 { \
99 int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
100 \
101 if (digest_pos != -1) \
102 { \
103 const u32 final_hash_pos = digests_offset + digest_pos; \
104 \
105 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
106 { \
107 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 0); \
108 } \
109 } \
110 } \
111 \
112 if (check (digest_tp1, \
113 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
114 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
115 bitmap_mask, \
116 bitmap_shift1, \
117 bitmap_shift2)) \
118 { \
119 int digest_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
120 \
121 if (digest_pos != -1) \
122 { \
123 const u32 final_hash_pos = digests_offset + digest_pos; \
124 \
125 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
126 { \
127 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 1); \
128 } \
129 } \
130 } \
131 }
132
133 #endif
134
135 // vliw4
136
137 #if VECT_SIZE == 4
138
139 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3))
140 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ))
141
142 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
143 { \
144 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
145 { \
146 const u32 final_hash_pos = digests_offset + 0; \
147 \
148 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
149 { \
150 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 0); \
151 } \
152 } \
153 \
154 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
155 { \
156 const u32 final_hash_pos = digests_offset + 0; \
157 \
158 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
159 { \
160 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 1); \
161 } \
162 } \
163 \
164 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
165 { \
166 const u32 final_hash_pos = digests_offset + 0; \
167 \
168 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
169 { \
170 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 2); \
171 } \
172 } \
173 \
174 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
175 { \
176 const u32 final_hash_pos = digests_offset + 0; \
177 \
178 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
179 { \
180 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 3); \
181 } \
182 } \
183 }
184
185 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
186 { \
187 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
188 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
189 const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
190 const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
191 \
192 if (check (digest_tp0, \
193 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
194 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
195 bitmap_mask, \
196 bitmap_shift1, \
197 bitmap_shift2)) \
198 { \
199 int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
200 \
201 if (digest_pos != -1) \
202 { \
203 const u32 final_hash_pos = digests_offset + digest_pos; \
204 \
205 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
206 { \
207 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 0); \
208 } \
209 } \
210 } \
211 \
212 if (check (digest_tp1, \
213 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
214 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
215 bitmap_mask, \
216 bitmap_shift1, \
217 bitmap_shift2)) \
218 { \
219 int digest_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
220 \
221 if (digest_pos != -1) \
222 { \
223 const u32 final_hash_pos = digests_offset + digest_pos; \
224 \
225 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
226 { \
227 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 1); \
228 } \
229 } \
230 } \
231 \
232 if (check (digest_tp2, \
233 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
234 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
235 bitmap_mask, \
236 bitmap_shift1, \
237 bitmap_shift2)) \
238 { \
239 int digest_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
240 \
241 if (digest_pos != -1) \
242 { \
243 const u32 final_hash_pos = digests_offset + digest_pos; \
244 \
245 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
246 { \
247 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 2); \
248 } \
249 } \
250 } \
251 \
252 if (check (digest_tp3, \
253 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
254 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
255 bitmap_mask, \
256 bitmap_shift1, \
257 bitmap_shift2)) \
258 { \
259 int digest_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
260 \
261 if (digest_pos != -1) \
262 { \
263 const u32 final_hash_pos = digests_offset + digest_pos; \
264 \
265 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
266 { \
267 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 3); \
268 } \
269 } \
270 } \
271 }
272
273 #endif
274
275 // vliw8
276
277 #if VECT_SIZE == 8
278
279 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7))
280 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ))
281
282 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
283 { \
284 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
285 { \
286 const u32 final_hash_pos = digests_offset + 0; \
287 \
288 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
289 { \
290 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 0); \
291 } \
292 } \
293 \
294 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
295 { \
296 const u32 final_hash_pos = digests_offset + 0; \
297 \
298 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
299 { \
300 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 1); \
301 } \
302 } \
303 \
304 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
305 { \
306 const u32 final_hash_pos = digests_offset + 0; \
307 \
308 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
309 { \
310 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 2); \
311 } \
312 } \
313 \
314 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
315 { \
316 const u32 final_hash_pos = digests_offset + 0; \
317 \
318 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
319 { \
320 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 3); \
321 } \
322 } \
323 if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
324 { \
325 const u32 final_hash_pos = digests_offset + 0; \
326 \
327 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
328 { \
329 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 4); \
330 } \
331 } \
332 \
333 if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
334 { \
335 const u32 final_hash_pos = digests_offset + 0; \
336 \
337 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
338 { \
339 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 5); \
340 } \
341 } \
342 \
343 if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
344 { \
345 const u32 final_hash_pos = digests_offset + 0; \
346 \
347 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
348 { \
349 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 6); \
350 } \
351 } \
352 \
353 if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
354 { \
355 const u32 final_hash_pos = digests_offset + 0; \
356 \
357 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
358 { \
359 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 7); \
360 } \
361 } \
362 }
363
364 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
365 { \
366 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
367 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
368 const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
369 const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
370 const u32 digest_tp4[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
371 const u32 digest_tp5[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
372 const u32 digest_tp6[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
373 const u32 digest_tp7[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
374 \
375 if (check (digest_tp0, \
376 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
377 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
378 bitmap_mask, \
379 bitmap_shift1, \
380 bitmap_shift2)) \
381 { \
382 int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
383 \
384 if (digest_pos != -1) \
385 { \
386 const u32 final_hash_pos = digests_offset + digest_pos; \
387 \
388 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
389 { \
390 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 0); \
391 } \
392 } \
393 } \
394 \
395 if (check (digest_tp1, \
396 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
397 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
398 bitmap_mask, \
399 bitmap_shift1, \
400 bitmap_shift2)) \
401 { \
402 int digest_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
403 \
404 if (digest_pos != -1) \
405 { \
406 const u32 final_hash_pos = digests_offset + digest_pos; \
407 \
408 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
409 { \
410 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 1); \
411 } \
412 } \
413 } \
414 \
415 if (check (digest_tp2, \
416 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
417 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
418 bitmap_mask, \
419 bitmap_shift1, \
420 bitmap_shift2)) \
421 { \
422 int digest_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
423 \
424 if (digest_pos != -1) \
425 { \
426 const u32 final_hash_pos = digests_offset + digest_pos; \
427 \
428 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
429 { \
430 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 2); \
431 } \
432 } \
433 } \
434 \
435 if (check (digest_tp3, \
436 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
437 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
438 bitmap_mask, \
439 bitmap_shift1, \
440 bitmap_shift2)) \
441 { \
442 int digest_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
443 \
444 if (digest_pos != -1) \
445 { \
446 const u32 final_hash_pos = digests_offset + digest_pos; \
447 \
448 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
449 { \
450 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 3); \
451 } \
452 } \
453 } \
454 if (check (digest_tp4, \
455 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
456 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
457 bitmap_mask, \
458 bitmap_shift1, \
459 bitmap_shift2)) \
460 { \
461 int digest_pos = find_hash (digest_tp4, digests_cnt, &digests_buf[digests_offset]); \
462 \
463 if (digest_pos != -1) \
464 { \
465 const u32 final_hash_pos = digests_offset + digest_pos; \
466 \
467 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
468 { \
469 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 4); \
470 } \
471 } \
472 } \
473 \
474 if (check (digest_tp5, \
475 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
476 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
477 bitmap_mask, \
478 bitmap_shift1, \
479 bitmap_shift2)) \
480 { \
481 int digest_pos = find_hash (digest_tp5, digests_cnt, &digests_buf[digests_offset]); \
482 \
483 if (digest_pos != -1) \
484 { \
485 const u32 final_hash_pos = digests_offset + digest_pos; \
486 \
487 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
488 { \
489 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 5); \
490 } \
491 } \
492 } \
493 \
494 if (check (digest_tp6, \
495 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
496 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
497 bitmap_mask, \
498 bitmap_shift1, \
499 bitmap_shift2)) \
500 { \
501 int digest_pos = find_hash (digest_tp6, digests_cnt, &digests_buf[digests_offset]); \
502 \
503 if (digest_pos != -1) \
504 { \
505 const u32 final_hash_pos = digests_offset + digest_pos; \
506 \
507 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
508 { \
509 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 6); \
510 } \
511 } \
512 } \
513 \
514 if (check (digest_tp7, \
515 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
516 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
517 bitmap_mask, \
518 bitmap_shift1, \
519 bitmap_shift2)) \
520 { \
521 int digest_pos = find_hash (digest_tp7, digests_cnt, &digests_buf[digests_offset]); \
522 \
523 if (digest_pos != -1) \
524 { \
525 const u32 final_hash_pos = digests_offset + digest_pos; \
526 \
527 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
528 { \
529 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 7); \
530 } \
531 } \
532 } \
533 }
534
535 #endif
536
537 // vliw16
538
539 #if VECT_SIZE == 16
540
541 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7) || ((a).s8 == (b).s8) || ((a).s9 == (b).s9) || ((a).sa == (b).sa) || ((a).sb == (b).sb) || ((a).sc == (b).sc) || ((a).sd == (b).sd) || ((a).se == (b).se) || ((a).sf == (b).sf))
542 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ) || ((a).s8 == (b) ) || ((a).s9 == (b) ) || ((a).sa == (b) ) || ((a).sb == (b) ) || ((a).sc == (b) ) || ((a).sd == (b) ) || ((a).se == (b) ) || ((a).sf == (b) ))
543
544 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
545 { \
546 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
547 { \
548 const u32 final_hash_pos = digests_offset + 0; \
549 \
550 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
551 { \
552 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 0); \
553 } \
554 } \
555 \
556 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
557 { \
558 const u32 final_hash_pos = digests_offset + 0; \
559 \
560 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
561 { \
562 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 1); \
563 } \
564 } \
565 \
566 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
567 { \
568 const u32 final_hash_pos = digests_offset + 0; \
569 \
570 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
571 { \
572 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 2); \
573 } \
574 } \
575 \
576 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
577 { \
578 const u32 final_hash_pos = digests_offset + 0; \
579 \
580 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
581 { \
582 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 3); \
583 } \
584 } \
585 if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
586 { \
587 const u32 final_hash_pos = digests_offset + 0; \
588 \
589 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
590 { \
591 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 4); \
592 } \
593 } \
594 \
595 if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
596 { \
597 const u32 final_hash_pos = digests_offset + 0; \
598 \
599 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
600 { \
601 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 5); \
602 } \
603 } \
604 \
605 if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
606 { \
607 const u32 final_hash_pos = digests_offset + 0; \
608 \
609 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
610 { \
611 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 6); \
612 } \
613 } \
614 \
615 if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
616 { \
617 const u32 final_hash_pos = digests_offset + 0; \
618 \
619 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
620 { \
621 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 7); \
622 } \
623 } \
624 \
625 if (((h0).s8 == search[0]) && ((h1).s8 == search[1]) && ((h2).s8 == search[2]) && ((h3).s8 == search[3])) \
626 { \
627 const u32 final_hash_pos = digests_offset + 0; \
628 \
629 if (vector_accessible (il_pos, il_cnt, 8) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
630 { \
631 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 8); \
632 } \
633 } \
634 \
635 if (((h0).s9 == search[0]) && ((h1).s9 == search[1]) && ((h2).s9 == search[2]) && ((h3).s9 == search[3])) \
636 { \
637 const u32 final_hash_pos = digests_offset + 0; \
638 \
639 if (vector_accessible (il_pos, il_cnt, 9) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
640 { \
641 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 9); \
642 } \
643 } \
644 \
645 if (((h0).sa == search[0]) && ((h1).sa == search[1]) && ((h2).sa == search[2]) && ((h3).sa == search[3])) \
646 { \
647 const u32 final_hash_pos = digests_offset + 0; \
648 \
649 if (vector_accessible (il_pos, il_cnt, 10) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
650 { \
651 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 10); \
652 } \
653 } \
654 \
655 if (((h0).sb == search[0]) && ((h1).sb == search[1]) && ((h2).sb == search[2]) && ((h3).sb == search[3])) \
656 { \
657 const u32 final_hash_pos = digests_offset + 0; \
658 \
659 if (vector_accessible (il_pos, il_cnt, 11) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
660 { \
661 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 11); \
662 } \
663 } \
664 \
665 if (((h0).sc == search[0]) && ((h1).sc == search[1]) && ((h2).sc == search[2]) && ((h3).sc == search[3])) \
666 { \
667 const u32 final_hash_pos = digests_offset + 0; \
668 \
669 if (vector_accessible (il_pos, il_cnt, 12) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
670 { \
671 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 12); \
672 } \
673 } \
674 \
675 if (((h0).sd == search[0]) && ((h1).sd == search[1]) && ((h2).sd == search[2]) && ((h3).sd == search[3])) \
676 { \
677 const u32 final_hash_pos = digests_offset + 0; \
678 \
679 if (vector_accessible (il_pos, il_cnt, 13) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
680 { \
681 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 13); \
682 } \
683 } \
684 \
685 if (((h0).se == search[0]) && ((h1).se == search[1]) && ((h2).se == search[2]) && ((h3).se == search[3])) \
686 { \
687 const u32 final_hash_pos = digests_offset + 0; \
688 \
689 if (vector_accessible (il_pos, il_cnt, 14) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
690 { \
691 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 14); \
692 } \
693 } \
694 \
695 if (((h0).sf == search[0]) && ((h1).sf == search[1]) && ((h2).sf == search[2]) && ((h3).sf == search[3])) \
696 { \
697 const u32 final_hash_pos = digests_offset + 0; \
698 \
699 if (vector_accessible (il_pos, il_cnt, 15) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
700 { \
701 mark_hash (plains_buf, d_return_buf, salt_pos, 0, final_hash_pos, gid, il_pos + 15); \
702 } \
703 } \
704 }
705
706 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
707 { \
708 const u32 digest_tp00[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
709 const u32 digest_tp01[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
710 const u32 digest_tp02[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
711 const u32 digest_tp03[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
712 const u32 digest_tp04[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
713 const u32 digest_tp05[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
714 const u32 digest_tp06[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
715 const u32 digest_tp07[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
716 const u32 digest_tp08[4] = { h0.s8, h1.s8, h2.s8, h3.s8 }; \
717 const u32 digest_tp09[4] = { h0.s9, h1.s9, h2.s9, h3.s9 }; \
718 const u32 digest_tp10[4] = { h0.sa, h1.sa, h2.sa, h3.sa }; \
719 const u32 digest_tp11[4] = { h0.sb, h1.sb, h2.sb, h3.sb }; \
720 const u32 digest_tp12[4] = { h0.sc, h1.sc, h2.sc, h3.sc }; \
721 const u32 digest_tp13[4] = { h0.sd, h1.sd, h2.sd, h3.sd }; \
722 const u32 digest_tp14[4] = { h0.se, h1.se, h2.se, h3.se }; \
723 const u32 digest_tp15[4] = { h0.sf, h1.sf, h2.sf, h3.sf }; \
724 \
725 if (check (digest_tp00, \
726 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
727 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
728 bitmap_mask, \
729 bitmap_shift1, \
730 bitmap_shift2)) \
731 { \
732 int digest_pos = find_hash (digest_tp00, digests_cnt, &digests_buf[digests_offset]); \
733 \
734 if (digest_pos != -1) \
735 { \
736 const u32 final_hash_pos = digests_offset + digest_pos; \
737 \
738 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
739 { \
740 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 0); \
741 } \
742 } \
743 } \
744 \
745 if (check (digest_tp01, \
746 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
747 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
748 bitmap_mask, \
749 bitmap_shift1, \
750 bitmap_shift2)) \
751 { \
752 int digest_pos = find_hash (digest_tp01, digests_cnt, &digests_buf[digests_offset]); \
753 \
754 if (digest_pos != -1) \
755 { \
756 const u32 final_hash_pos = digests_offset + digest_pos; \
757 \
758 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
759 { \
760 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 1); \
761 } \
762 } \
763 } \
764 \
765 if (check (digest_tp02, \
766 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
767 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
768 bitmap_mask, \
769 bitmap_shift1, \
770 bitmap_shift2)) \
771 { \
772 int digest_pos = find_hash (digest_tp02, digests_cnt, &digests_buf[digests_offset]); \
773 \
774 if (digest_pos != -1) \
775 { \
776 const u32 final_hash_pos = digests_offset + digest_pos; \
777 \
778 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
779 { \
780 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 2); \
781 } \
782 } \
783 } \
784 \
785 if (check (digest_tp03, \
786 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
787 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
788 bitmap_mask, \
789 bitmap_shift1, \
790 bitmap_shift2)) \
791 { \
792 int digest_pos = find_hash (digest_tp03, digests_cnt, &digests_buf[digests_offset]); \
793 \
794 if (digest_pos != -1) \
795 { \
796 const u32 final_hash_pos = digests_offset + digest_pos; \
797 \
798 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
799 { \
800 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 3); \
801 } \
802 } \
803 } \
804 \
805 if (check (digest_tp04, \
806 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
807 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
808 bitmap_mask, \
809 bitmap_shift1, \
810 bitmap_shift2)) \
811 { \
812 int digest_pos = find_hash (digest_tp04, digests_cnt, &digests_buf[digests_offset]); \
813 \
814 if (digest_pos != -1) \
815 { \
816 const u32 final_hash_pos = digests_offset + digest_pos; \
817 \
818 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
819 { \
820 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 4); \
821 } \
822 } \
823 } \
824 \
825 if (check (digest_tp05, \
826 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
827 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
828 bitmap_mask, \
829 bitmap_shift1, \
830 bitmap_shift2)) \
831 { \
832 int digest_pos = find_hash (digest_tp05, digests_cnt, &digests_buf[digests_offset]); \
833 \
834 if (digest_pos != -1) \
835 { \
836 const u32 final_hash_pos = digests_offset + digest_pos; \
837 \
838 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
839 { \
840 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 5); \
841 } \
842 } \
843 } \
844 \
845 if (check (digest_tp06, \
846 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
847 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
848 bitmap_mask, \
849 bitmap_shift1, \
850 bitmap_shift2)) \
851 { \
852 int digest_pos = find_hash (digest_tp06, digests_cnt, &digests_buf[digests_offset]); \
853 \
854 if (digest_pos != -1) \
855 { \
856 const u32 final_hash_pos = digests_offset + digest_pos; \
857 \
858 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
859 { \
860 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 6); \
861 } \
862 } \
863 } \
864 \
865 if (check (digest_tp07, \
866 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
867 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
868 bitmap_mask, \
869 bitmap_shift1, \
870 bitmap_shift2)) \
871 { \
872 int digest_pos = find_hash (digest_tp07, digests_cnt, &digests_buf[digests_offset]); \
873 \
874 if (digest_pos != -1) \
875 { \
876 const u32 final_hash_pos = digests_offset + digest_pos; \
877 \
878 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
879 { \
880 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 7); \
881 } \
882 } \
883 } \
884 \
885 if (check (digest_tp08, \
886 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
887 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
888 bitmap_mask, \
889 bitmap_shift1, \
890 bitmap_shift2)) \
891 { \
892 int digest_pos = find_hash (digest_tp08, digests_cnt, &digests_buf[digests_offset]); \
893 \
894 if (digest_pos != -1) \
895 { \
896 const u32 final_hash_pos = digests_offset + digest_pos; \
897 \
898 if (vector_accessible (il_pos, il_cnt, 8) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
899 { \
900 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 8); \
901 } \
902 } \
903 } \
904 \
905 if (check (digest_tp09, \
906 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
907 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
908 bitmap_mask, \
909 bitmap_shift1, \
910 bitmap_shift2)) \
911 { \
912 int digest_pos = find_hash (digest_tp09, digests_cnt, &digests_buf[digests_offset]); \
913 \
914 if (digest_pos != -1) \
915 { \
916 const u32 final_hash_pos = digests_offset + digest_pos; \
917 \
918 if (vector_accessible (il_pos, il_cnt, 9) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
919 { \
920 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 9); \
921 } \
922 } \
923 } \
924 \
925 if (check (digest_tp10, \
926 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
927 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
928 bitmap_mask, \
929 bitmap_shift1, \
930 bitmap_shift2)) \
931 { \
932 int digest_pos = find_hash (digest_tp10, digests_cnt, &digests_buf[digests_offset]); \
933 \
934 if (digest_pos != -1) \
935 { \
936 const u32 final_hash_pos = digests_offset + digest_pos; \
937 \
938 if (vector_accessible (il_pos, il_cnt, 10) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
939 { \
940 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 10); \
941 } \
942 } \
943 } \
944 \
945 if (check (digest_tp11, \
946 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
947 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
948 bitmap_mask, \
949 bitmap_shift1, \
950 bitmap_shift2)) \
951 { \
952 int digest_pos = find_hash (digest_tp11, digests_cnt, &digests_buf[digests_offset]); \
953 \
954 if (digest_pos != -1) \
955 { \
956 const u32 final_hash_pos = digests_offset + digest_pos; \
957 \
958 if (vector_accessible (il_pos, il_cnt, 11) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
959 { \
960 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 11); \
961 } \
962 } \
963 } \
964 \
965 if (check (digest_tp12, \
966 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
967 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
968 bitmap_mask, \
969 bitmap_shift1, \
970 bitmap_shift2)) \
971 { \
972 int digest_pos = find_hash (digest_tp12, digests_cnt, &digests_buf[digests_offset]); \
973 \
974 if (digest_pos != -1) \
975 { \
976 const u32 final_hash_pos = digests_offset + digest_pos; \
977 \
978 if (vector_accessible (il_pos, il_cnt, 12) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
979 { \
980 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 12); \
981 } \
982 } \
983 } \
984 \
985 if (check (digest_tp13, \
986 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
987 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
988 bitmap_mask, \
989 bitmap_shift1, \
990 bitmap_shift2)) \
991 { \
992 int digest_pos = find_hash (digest_tp13, digests_cnt, &digests_buf[digests_offset]); \
993 \
994 if (digest_pos != -1) \
995 { \
996 const u32 final_hash_pos = digests_offset + digest_pos; \
997 \
998 if (vector_accessible (il_pos, il_cnt, 13) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
999 { \
1000 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 13); \
1001 } \
1002 } \
1003 } \
1004 \
1005 if (check (digest_tp14, \
1006 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1007 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1008 bitmap_mask, \
1009 bitmap_shift1, \
1010 bitmap_shift2)) \
1011 { \
1012 int digest_pos = find_hash (digest_tp14, digests_cnt, &digests_buf[digests_offset]); \
1013 \
1014 if (digest_pos != -1) \
1015 { \
1016 const u32 final_hash_pos = digests_offset + digest_pos; \
1017 \
1018 if (vector_accessible (il_pos, il_cnt, 14) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
1019 { \
1020 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 14); \
1021 } \
1022 } \
1023 } \
1024 \
1025 if (check (digest_tp15, \
1026 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1027 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1028 bitmap_mask, \
1029 bitmap_shift1, \
1030 bitmap_shift2)) \
1031 { \
1032 int digest_pos = find_hash (digest_tp15, digests_cnt, &digests_buf[digests_offset]); \
1033 \
1034 if (digest_pos != -1) \
1035 { \
1036 const u32 final_hash_pos = digests_offset + digest_pos; \
1037 \
1038 if (vector_accessible (il_pos, il_cnt, 15) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \
1039 { \
1040 mark_hash (plains_buf, d_return_buf, salt_pos, digest_pos, final_hash_pos, gid, il_pos + 15); \
1041 } \
1042 } \
1043 } \
1044 }
1045
1046 #endif
1047
1048 #define MATCHES_NONE_VV(a,b) !(MATCHES_ONE_VV ((a), (b)))
1049 #define MATCHES_NONE_VS(a,b) !(MATCHES_ONE_VS ((a), (b)))
1050
1051 // attack-mode 0
1052
1053 inline u32x ix_create_bft (__global bf_t *bfs_buf, const u32 il_pos)
1054 {
1055 #if VECT_SIZE == 1
1056 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i);
1057 #elif VECT_SIZE == 2
1058 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
1059 #elif VECT_SIZE == 4
1060 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
1061 #elif VECT_SIZE == 8
1062 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
1063 #elif VECT_SIZE == 16
1064 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i);
1065 #endif
1066
1067 return ix;
1068 }
1069
1070 // attack-mode 1
1071
1072 inline u32x pwlenx_create_combt (__global comb_t *combs_buf, const u32 il_pos)
1073 {
1074 #if VECT_SIZE == 1
1075 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len);
1076 #elif VECT_SIZE == 2
1077 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len);
1078 #elif VECT_SIZE == 4
1079 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len);
1080 #elif VECT_SIZE == 8
1081 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len);
1082 #elif VECT_SIZE == 16
1083 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len, combs_buf[il_pos + 8].pw_len, combs_buf[il_pos + 9].pw_len, combs_buf[il_pos + 10].pw_len, combs_buf[il_pos + 11].pw_len, combs_buf[il_pos + 12].pw_len, combs_buf[il_pos + 13].pw_len, combs_buf[il_pos + 14].pw_len, combs_buf[il_pos + 15].pw_len);
1084 #endif
1085
1086 return pw_lenx;
1087 }
1088
1089 inline u32x ix_create_combt (__global comb_t *combs_buf, const u32 il_pos, const int idx)
1090 {
1091 #if VECT_SIZE == 1
1092 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx]);
1093 #elif VECT_SIZE == 2
1094 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx]);
1095 #elif VECT_SIZE == 4
1096 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx]);
1097 #elif VECT_SIZE == 8
1098 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx]);
1099 #elif VECT_SIZE == 16
1100 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx], combs_buf[il_pos + 8].i[idx], combs_buf[il_pos + 9].i[idx], combs_buf[il_pos + 10].i[idx], combs_buf[il_pos + 11].i[idx], combs_buf[il_pos + 12].i[idx], combs_buf[il_pos + 13].i[idx], combs_buf[il_pos + 14].i[idx], combs_buf[il_pos + 15].i[idx]);
1101 #endif
1102
1103 return ix;
1104 }
1105
1106 #if VECT_SIZE == 1
1107 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 1) + 0].var[(idx)])
1108 #elif VECT_SIZE == 2
1109 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 2) + 0].var[(idx)], (arr)[((gid) * 2) + 1].var[(idx)])
1110 #elif VECT_SIZE == 4
1111 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 4) + 0].var[(idx)], (arr)[((gid) * 4) + 1].var[(idx)], (arr)[((gid) * 4) + 2].var[(idx)], (arr)[((gid) * 4) + 3].var[(idx)])
1112 #elif VECT_SIZE == 8
1113 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)])
1114 #elif VECT_SIZE == 16
1115 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)])
1116 #endif
1117
1118 #if VECT_SIZE == 1
1119 #define packvf(arr,var,gid) (u32x) ((arr)[((gid) * 1) + 0].var)
1120 #elif VECT_SIZE == 2
1121 #define packvf(arr,var,gid) (u32x) ((arr)[((gid) * 2) + 0].var, (arr)[((gid) * 2) + 1].var)
1122 #elif VECT_SIZE == 4
1123 #define packvf(arr,var,gid) (u32x) ((arr)[((gid) * 4) + 0].var, (arr)[((gid) * 4) + 1].var, (arr)[((gid) * 4) + 2].var, (arr)[((gid) * 4) + 3].var)
1124 #elif VECT_SIZE == 8
1125 #define packvf(arr,var,gid) (u32x) ((arr)[((gid) * 8) + 0].var, (arr)[((gid) * 8) + 1].var, (arr)[((gid) * 8) + 2].var, (arr)[((gid) * 8) + 3].var, (arr)[((gid) * 8) + 4].var, (arr)[((gid) * 8) + 5].var, (arr)[((gid) * 8) + 6].var, (arr)[((gid) * 8) + 7].var)
1126 #elif VECT_SIZE == 16
1127 #define packvf(arr,var,gid) (u32x) ((arr)[((gid) * 16) + 0].var, (arr)[((gid) * 16) + 1].var, (arr)[((gid) * 16) + 2].var, (arr)[((gid) * 16) + 3].var, (arr)[((gid) * 16) + 4].var, (arr)[((gid) * 16) + 5].var, (arr)[((gid) * 16) + 6].var, (arr)[((gid) * 16) + 7].var, (arr)[((gid) * 16) + 8].var, (arr)[((gid) * 16) + 9].var, (arr)[((gid) * 16) + 10].var, (arr)[((gid) * 16) + 11].var, (arr)[((gid) * 16) + 12].var, (arr)[((gid) * 16) + 13].var, (arr)[((gid) * 16) + 14].var, (arr)[((gid) * 16) + 15].var)
1128 #endif
1129
1130 #if VECT_SIZE == 1
1131 #define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) * 1) + 0].var[(idx)])
1132 #elif VECT_SIZE == 2
1133 #define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) * 2) + 0].var[(idx)], (arr)[((gid) * 2) + 1].var[(idx)])
1134 #elif VECT_SIZE == 4
1135 #define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) * 4) + 0].var[(idx)], (arr)[((gid) * 4) + 1].var[(idx)], (arr)[((gid) * 4) + 2].var[(idx)], (arr)[((gid) * 4) + 3].var[(idx)])
1136 #elif VECT_SIZE == 8
1137 #define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)])
1138 #elif VECT_SIZE == 16
1139 #define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)])
1140 #endif
1141
1142 #if VECT_SIZE == 1
1143 #define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) * 1) + 0].var)
1144 #elif VECT_SIZE == 2
1145 #define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) * 2) + 0].var, (arr)[((gid) * 2) + 1].var)
1146 #elif VECT_SIZE == 4
1147 #define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) * 4) + 0].var, (arr)[((gid) * 4) + 1].var, (arr)[((gid) * 4) + 2].var, (arr)[((gid) * 4) + 3].var)
1148 #elif VECT_SIZE == 8
1149 #define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) * 8) + 0].var, (arr)[((gid) * 8) + 1].var, (arr)[((gid) * 8) + 2].var, (arr)[((gid) * 8) + 3].var, (arr)[((gid) * 8) + 4].var, (arr)[((gid) * 8) + 5].var, (arr)[((gid) * 8) + 6].var, (arr)[((gid) * 8) + 7].var)
1150 #elif VECT_SIZE == 16
1151 #define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) * 16) + 0].var, (arr)[((gid) * 16) + 1].var, (arr)[((gid) * 16) + 2].var, (arr)[((gid) * 16) + 3].var, (arr)[((gid) * 16) + 4].var, (arr)[((gid) * 16) + 5].var, (arr)[((gid) * 16) + 6].var, (arr)[((gid) * 16) + 7].var, (arr)[((gid) * 16) + 8].var, (arr)[((gid) * 16) + 9].var, (arr)[((gid) * 16) + 10].var, (arr)[((gid) * 16) + 11].var, (arr)[((gid) * 16) + 12].var, (arr)[((gid) * 16) + 13].var, (arr)[((gid) * 16) + 14].var, (arr)[((gid) * 16) + 15].var)
1152 #endif
1153
1154 #if VECT_SIZE == 1
1155 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 1) + 0].var[(idx)] = val;
1156 #elif VECT_SIZE == 2
1157 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 2) + 0].var[(idx)] = val.s0; (arr)[((gid) * 2) + 1].var[(idx)] = val.s1;
1158 #elif VECT_SIZE == 4
1159 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 4) + 0].var[(idx)] = val.s0; (arr)[((gid) * 4) + 1].var[(idx)] = val.s1; (arr)[((gid) * 4) + 2].var[(idx)] = val.s2; (arr)[((gid) * 4) + 3].var[(idx)] = val.s3;
1160 #elif VECT_SIZE == 8
1161 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] = val.s0; (arr)[((gid) * 8) + 1].var[(idx)] = val.s1; (arr)[((gid) * 8) + 2].var[(idx)] = val.s2; (arr)[((gid) * 8) + 3].var[(idx)] = val.s3; (arr)[((gid) * 8) + 4].var[(idx)] = val.s4; (arr)[((gid) * 8) + 5].var[(idx)] = val.s5; (arr)[((gid) * 8) + 6].var[(idx)] = val.s6; (arr)[((gid) * 8) + 7].var[(idx)] = val.s7;
1162 #elif VECT_SIZE == 16
1163 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] = val.s0; (arr)[((gid) * 16) + 1].var[(idx)] = val.s1; (arr)[((gid) * 16) + 2].var[(idx)] = val.s2; (arr)[((gid) * 16) + 3].var[(idx)] = val.s3; (arr)[((gid) * 16) + 4].var[(idx)] = val.s4; (arr)[((gid) * 16) + 5].var[(idx)] = val.s5; (arr)[((gid) * 16) + 6].var[(idx)] = val.s6; (arr)[((gid) * 16) + 7].var[(idx)] = val.s7; (arr)[((gid) * 16) + 8].var[(idx)] = val.s8; (arr)[((gid) * 16) + 9].var[(idx)] = val.s9; (arr)[((gid) * 16) + 10].var[(idx)] = val.sa; (arr)[((gid) * 16) + 11].var[(idx)] = val.sb; (arr)[((gid) * 16) + 12].var[(idx)] = val.sc; (arr)[((gid) * 16) + 13].var[(idx)] = val.sd; (arr)[((gid) * 16) + 14].var[(idx)] = val.se; (arr)[((gid) * 16) + 15].var[(idx)] = val.sf;
1164 #endif