6802b98eb8204ee86aa87a3d19d61d28013e25b5
[hashcat.git] / OpenCL / simd.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 // vliw1
9
10 #if VECT_SIZE == 1
11
12 #define MATCHES_ONE_VV(a,b) ((a) == (b))
13 #define MATCHES_ONE_VS(a,b) ((a) == (b))
14
15 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
16 { \
17 if (((h0) == search[0]) && ((h1) == search[1]) && ((h2) == search[2]) && ((h3) == search[3])) \
18 { \
19 const u32 final_hash_pos = digests_offset + 0; \
20 \
21 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
22 { \
23 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); \
24 \
25 d_return_buf[lid] = 1; \
26 } \
27 } \
28 }
29
30 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
31 { \
32 const u32 digest_tp0[4] = { h0, h1, h2, h3 }; \
33 \
34 if (check (digest_tp0, \
35 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
36 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
37 bitmap_mask, \
38 bitmap_shift1, \
39 bitmap_shift2)) \
40 { \
41 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
42 \
43 if (hash_pos != -1) \
44 { \
45 const u32 final_hash_pos = digests_offset + hash_pos; \
46 \
47 if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) \
48 { \
49 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); \
50 \
51 d_return_buf[lid] = 1; \
52 } \
53 } \
54 } \
55 }
56
57 #endif
58
59 // vliw2
60
61 #define vector_accessible(p,c,e) (((p) + (e)) < (c))
62
63 #if VECT_SIZE == 2
64
65 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1))
66 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ))
67
68 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
69 { \
70 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
71 { \
72 const u32 final_hash_pos = digests_offset + 0; \
73 \
74 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
75 { \
76 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
77 \
78 d_return_buf[lid] = 1; \
79 } \
80 } \
81 \
82 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
83 { \
84 const u32 final_hash_pos = digests_offset + 0; \
85 \
86 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
87 { \
88 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
89 \
90 d_return_buf[lid] = 1; \
91 } \
92 } \
93 }
94
95 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
96 { \
97 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
98 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
99 \
100 if (check (digest_tp0, \
101 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
102 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
103 bitmap_mask, \
104 bitmap_shift1, \
105 bitmap_shift2)) \
106 { \
107 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
108 \
109 if (hash_pos != -1) \
110 { \
111 const u32 final_hash_pos = digests_offset + hash_pos; \
112 \
113 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
114 { \
115 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
116 \
117 d_return_buf[lid] = 1; \
118 } \
119 } \
120 } \
121 \
122 if (check (digest_tp1, \
123 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
124 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
125 bitmap_mask, \
126 bitmap_shift1, \
127 bitmap_shift2)) \
128 { \
129 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
130 \
131 if (hash_pos != -1) \
132 { \
133 const u32 final_hash_pos = digests_offset + hash_pos; \
134 \
135 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
136 { \
137 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
138 \
139 d_return_buf[lid] = 1; \
140 } \
141 } \
142 } \
143 }
144
145 #endif
146
147 // vliw4
148
149 #if VECT_SIZE == 4
150
151 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3))
152 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ))
153
154 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
155 { \
156 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
157 { \
158 const u32 final_hash_pos = digests_offset + 0; \
159 \
160 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
161 { \
162 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
163 \
164 d_return_buf[lid] = 1; \
165 } \
166 } \
167 \
168 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
169 { \
170 const u32 final_hash_pos = digests_offset + 0; \
171 \
172 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
173 { \
174 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
175 \
176 d_return_buf[lid] = 1; \
177 } \
178 } \
179 \
180 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
181 { \
182 const u32 final_hash_pos = digests_offset + 0; \
183 \
184 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
185 { \
186 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
187 \
188 d_return_buf[lid] = 1; \
189 } \
190 } \
191 \
192 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
193 { \
194 const u32 final_hash_pos = digests_offset + 0; \
195 \
196 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
197 { \
198 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
199 \
200 d_return_buf[lid] = 1; \
201 } \
202 } \
203 }
204
205 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
206 { \
207 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
208 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
209 const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
210 const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
211 \
212 if (check (digest_tp0, \
213 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
214 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
215 bitmap_mask, \
216 bitmap_shift1, \
217 bitmap_shift2)) \
218 { \
219 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
220 \
221 if (hash_pos != -1) \
222 { \
223 const u32 final_hash_pos = digests_offset + hash_pos; \
224 \
225 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
226 { \
227 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
228 \
229 d_return_buf[lid] = 1; \
230 } \
231 } \
232 } \
233 \
234 if (check (digest_tp1, \
235 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
236 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
237 bitmap_mask, \
238 bitmap_shift1, \
239 bitmap_shift2)) \
240 { \
241 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
242 \
243 if (hash_pos != -1) \
244 { \
245 const u32 final_hash_pos = digests_offset + hash_pos; \
246 \
247 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
248 { \
249 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
250 \
251 d_return_buf[lid] = 1; \
252 } \
253 } \
254 } \
255 \
256 if (check (digest_tp2, \
257 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
258 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
259 bitmap_mask, \
260 bitmap_shift1, \
261 bitmap_shift2)) \
262 { \
263 int hash_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
264 \
265 if (hash_pos != -1) \
266 { \
267 const u32 final_hash_pos = digests_offset + hash_pos; \
268 \
269 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
270 { \
271 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
272 \
273 d_return_buf[lid] = 1; \
274 } \
275 } \
276 } \
277 \
278 if (check (digest_tp3, \
279 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
280 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
281 bitmap_mask, \
282 bitmap_shift1, \
283 bitmap_shift2)) \
284 { \
285 int hash_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
286 \
287 if (hash_pos != -1) \
288 { \
289 const u32 final_hash_pos = digests_offset + hash_pos; \
290 \
291 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
292 { \
293 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
294 \
295 d_return_buf[lid] = 1; \
296 } \
297 } \
298 } \
299 }
300
301 #endif
302
303 // vliw8
304
305 #if VECT_SIZE == 8
306
307 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7))
308 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ))
309
310 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
311 { \
312 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
313 { \
314 const u32 final_hash_pos = digests_offset + 0; \
315 \
316 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
317 { \
318 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
319 \
320 d_return_buf[lid] = 1; \
321 } \
322 } \
323 \
324 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
325 { \
326 const u32 final_hash_pos = digests_offset + 0; \
327 \
328 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
329 { \
330 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
331 \
332 d_return_buf[lid] = 1; \
333 } \
334 } \
335 \
336 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
337 { \
338 const u32 final_hash_pos = digests_offset + 0; \
339 \
340 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
341 { \
342 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
343 \
344 d_return_buf[lid] = 1; \
345 } \
346 } \
347 \
348 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
349 { \
350 const u32 final_hash_pos = digests_offset + 0; \
351 \
352 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
353 { \
354 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
355 \
356 d_return_buf[lid] = 1; \
357 } \
358 } \
359 if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
360 { \
361 const u32 final_hash_pos = digests_offset + 0; \
362 \
363 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
364 { \
365 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
366 \
367 d_return_buf[lid] = 1; \
368 } \
369 } \
370 \
371 if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
372 { \
373 const u32 final_hash_pos = digests_offset + 0; \
374 \
375 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
376 { \
377 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
378 \
379 d_return_buf[lid] = 1; \
380 } \
381 } \
382 \
383 if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
384 { \
385 const u32 final_hash_pos = digests_offset + 0; \
386 \
387 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
388 { \
389 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
390 \
391 d_return_buf[lid] = 1; \
392 } \
393 } \
394 \
395 if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
396 { \
397 const u32 final_hash_pos = digests_offset + 0; \
398 \
399 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
400 { \
401 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
402 \
403 d_return_buf[lid] = 1; \
404 } \
405 } \
406 }
407
408 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
409 { \
410 const u32 digest_tp0[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
411 const u32 digest_tp1[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
412 const u32 digest_tp2[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
413 const u32 digest_tp3[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
414 const u32 digest_tp4[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
415 const u32 digest_tp5[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
416 const u32 digest_tp6[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
417 const u32 digest_tp7[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
418 \
419 if (check (digest_tp0, \
420 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
421 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
422 bitmap_mask, \
423 bitmap_shift1, \
424 bitmap_shift2)) \
425 { \
426 int hash_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \
427 \
428 if (hash_pos != -1) \
429 { \
430 const u32 final_hash_pos = digests_offset + hash_pos; \
431 \
432 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
433 { \
434 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
435 \
436 d_return_buf[lid] = 1; \
437 } \
438 } \
439 } \
440 \
441 if (check (digest_tp1, \
442 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
443 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
444 bitmap_mask, \
445 bitmap_shift1, \
446 bitmap_shift2)) \
447 { \
448 int hash_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \
449 \
450 if (hash_pos != -1) \
451 { \
452 const u32 final_hash_pos = digests_offset + hash_pos; \
453 \
454 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
455 { \
456 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
457 \
458 d_return_buf[lid] = 1; \
459 } \
460 } \
461 } \
462 \
463 if (check (digest_tp2, \
464 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
465 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
466 bitmap_mask, \
467 bitmap_shift1, \
468 bitmap_shift2)) \
469 { \
470 int hash_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \
471 \
472 if (hash_pos != -1) \
473 { \
474 const u32 final_hash_pos = digests_offset + hash_pos; \
475 \
476 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
477 { \
478 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
479 \
480 d_return_buf[lid] = 1; \
481 } \
482 } \
483 } \
484 \
485 if (check (digest_tp3, \
486 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
487 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
488 bitmap_mask, \
489 bitmap_shift1, \
490 bitmap_shift2)) \
491 { \
492 int hash_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \
493 \
494 if (hash_pos != -1) \
495 { \
496 const u32 final_hash_pos = digests_offset + hash_pos; \
497 \
498 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
499 { \
500 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
501 \
502 d_return_buf[lid] = 1; \
503 } \
504 } \
505 } \
506 if (check (digest_tp4, \
507 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
508 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
509 bitmap_mask, \
510 bitmap_shift1, \
511 bitmap_shift2)) \
512 { \
513 int hash_pos = find_hash (digest_tp4, digests_cnt, &digests_buf[digests_offset]); \
514 \
515 if (hash_pos != -1) \
516 { \
517 const u32 final_hash_pos = digests_offset + hash_pos; \
518 \
519 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
520 { \
521 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
522 \
523 d_return_buf[lid] = 1; \
524 } \
525 } \
526 } \
527 \
528 if (check (digest_tp5, \
529 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
530 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
531 bitmap_mask, \
532 bitmap_shift1, \
533 bitmap_shift2)) \
534 { \
535 int hash_pos = find_hash (digest_tp5, digests_cnt, &digests_buf[digests_offset]); \
536 \
537 if (hash_pos != -1) \
538 { \
539 const u32 final_hash_pos = digests_offset + hash_pos; \
540 \
541 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
542 { \
543 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
544 \
545 d_return_buf[lid] = 1; \
546 } \
547 } \
548 } \
549 \
550 if (check (digest_tp6, \
551 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
552 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
553 bitmap_mask, \
554 bitmap_shift1, \
555 bitmap_shift2)) \
556 { \
557 int hash_pos = find_hash (digest_tp6, digests_cnt, &digests_buf[digests_offset]); \
558 \
559 if (hash_pos != -1) \
560 { \
561 const u32 final_hash_pos = digests_offset + hash_pos; \
562 \
563 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
564 { \
565 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
566 \
567 d_return_buf[lid] = 1; \
568 } \
569 } \
570 } \
571 \
572 if (check (digest_tp7, \
573 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
574 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
575 bitmap_mask, \
576 bitmap_shift1, \
577 bitmap_shift2)) \
578 { \
579 int hash_pos = find_hash (digest_tp7, digests_cnt, &digests_buf[digests_offset]); \
580 \
581 if (hash_pos != -1) \
582 { \
583 const u32 final_hash_pos = digests_offset + hash_pos; \
584 \
585 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
586 { \
587 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
588 \
589 d_return_buf[lid] = 1; \
590 } \
591 } \
592 } \
593 }
594
595 #endif
596
597 // vliw16
598
599 #if VECT_SIZE == 16
600
601 #define MATCHES_ONE_VV(a,b) (((a).s0 == (b).s0) || ((a).s1 == (b).s1) || ((a).s2 == (b).s2) || ((a).s3 == (b).s3) || ((a).s4 == (b).s4) || ((a).s5 == (b).s5) || ((a).s6 == (b).s6) || ((a).s7 == (b).s7) || ((a).s8 == (b).s8) || ((a).s9 == (b).s9) || ((a).sa == (b).sa) || ((a).sb == (b).sb) || ((a).sc == (b).sc) || ((a).sd == (b).sd) || ((a).se == (b).se) || ((a).sf == (b).sf))
602 #define MATCHES_ONE_VS(a,b) (((a).s0 == (b) ) || ((a).s1 == (b) ) || ((a).s2 == (b) ) || ((a).s3 == (b) ) || ((a).s4 == (b) ) || ((a).s5 == (b) ) || ((a).s6 == (b) ) || ((a).s7 == (b) ) || ((a).s8 == (b) ) || ((a).s9 == (b) ) || ((a).sa == (b) ) || ((a).sb == (b) ) || ((a).sc == (b) ) || ((a).sd == (b) ) || ((a).se == (b) ) || ((a).sf == (b) ))
603
604 #define COMPARE_S_SIMD(h0,h1,h2,h3) \
605 { \
606 if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \
607 { \
608 const u32 final_hash_pos = digests_offset + 0; \
609 \
610 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
611 { \
612 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
613 \
614 d_return_buf[lid] = 1; \
615 } \
616 } \
617 \
618 if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \
619 { \
620 const u32 final_hash_pos = digests_offset + 0; \
621 \
622 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
623 { \
624 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
625 \
626 d_return_buf[lid] = 1; \
627 } \
628 } \
629 \
630 if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \
631 { \
632 const u32 final_hash_pos = digests_offset + 0; \
633 \
634 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
635 { \
636 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
637 \
638 d_return_buf[lid] = 1; \
639 } \
640 } \
641 \
642 if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \
643 { \
644 const u32 final_hash_pos = digests_offset + 0; \
645 \
646 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
647 { \
648 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
649 \
650 d_return_buf[lid] = 1; \
651 } \
652 } \
653 if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \
654 { \
655 const u32 final_hash_pos = digests_offset + 0; \
656 \
657 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
658 { \
659 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
660 \
661 d_return_buf[lid] = 1; \
662 } \
663 } \
664 \
665 if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \
666 { \
667 const u32 final_hash_pos = digests_offset + 0; \
668 \
669 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
670 { \
671 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
672 \
673 d_return_buf[lid] = 1; \
674 } \
675 } \
676 \
677 if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \
678 { \
679 const u32 final_hash_pos = digests_offset + 0; \
680 \
681 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
682 { \
683 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
684 \
685 d_return_buf[lid] = 1; \
686 } \
687 } \
688 \
689 if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \
690 { \
691 const u32 final_hash_pos = digests_offset + 0; \
692 \
693 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
694 { \
695 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
696 \
697 d_return_buf[lid] = 1; \
698 } \
699 } \
700 \
701 if (((h0).s8 == search[0]) && ((h1).s8 == search[1]) && ((h2).s8 == search[2]) && ((h3).s8 == search[3])) \
702 { \
703 const u32 final_hash_pos = digests_offset + 0; \
704 \
705 if (vector_accessible (il_pos, il_cnt, 8) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
706 { \
707 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 8); \
708 \
709 d_return_buf[lid] = 1; \
710 } \
711 } \
712 \
713 if (((h0).s9 == search[0]) && ((h1).s9 == search[1]) && ((h2).s9 == search[2]) && ((h3).s9 == search[3])) \
714 { \
715 const u32 final_hash_pos = digests_offset + 0; \
716 \
717 if (vector_accessible (il_pos, il_cnt, 9) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
718 { \
719 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 9); \
720 \
721 d_return_buf[lid] = 1; \
722 } \
723 } \
724 \
725 if (((h0).sa == search[0]) && ((h1).sa == search[1]) && ((h2).sa == search[2]) && ((h3).sa == search[3])) \
726 { \
727 const u32 final_hash_pos = digests_offset + 0; \
728 \
729 if (vector_accessible (il_pos, il_cnt, 10) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
730 { \
731 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 10); \
732 \
733 d_return_buf[lid] = 1; \
734 } \
735 } \
736 \
737 if (((h0).sb == search[0]) && ((h1).sb == search[1]) && ((h2).sb == search[2]) && ((h3).sb == search[3])) \
738 { \
739 const u32 final_hash_pos = digests_offset + 0; \
740 \
741 if (vector_accessible (il_pos, il_cnt, 11) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
742 { \
743 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 11); \
744 \
745 d_return_buf[lid] = 1; \
746 } \
747 } \
748 \
749 if (((h0).sc == search[0]) && ((h1).sc == search[1]) && ((h2).sc == search[2]) && ((h3).sc == search[3])) \
750 { \
751 const u32 final_hash_pos = digests_offset + 0; \
752 \
753 if (vector_accessible (il_pos, il_cnt, 12) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
754 { \
755 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 12); \
756 \
757 d_return_buf[lid] = 1; \
758 } \
759 } \
760 \
761 if (((h0).sd == search[0]) && ((h1).sd == search[1]) && ((h2).sd == search[2]) && ((h3).sd == search[3])) \
762 { \
763 const u32 final_hash_pos = digests_offset + 0; \
764 \
765 if (vector_accessible (il_pos, il_cnt, 13) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
766 { \
767 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 13); \
768 \
769 d_return_buf[lid] = 1; \
770 } \
771 } \
772 \
773 if (((h0).se == search[0]) && ((h1).se == search[1]) && ((h2).se == search[2]) && ((h3).se == search[3])) \
774 { \
775 const u32 final_hash_pos = digests_offset + 0; \
776 \
777 if (vector_accessible (il_pos, il_cnt, 14) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
778 { \
779 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 14); \
780 \
781 d_return_buf[lid] = 1; \
782 } \
783 } \
784 \
785 if (((h0).sf == search[0]) && ((h1).sf == search[1]) && ((h2).sf == search[2]) && ((h3).sf == search[3])) \
786 { \
787 const u32 final_hash_pos = digests_offset + 0; \
788 \
789 if (vector_accessible (il_pos, il_cnt, 15) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
790 { \
791 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 15); \
792 \
793 d_return_buf[lid] = 1; \
794 } \
795 } \
796 }
797
798 #define COMPARE_M_SIMD(h0,h1,h2,h3) \
799 { \
800 const u32 digest_tp00[4] = { h0.s0, h1.s0, h2.s0, h3.s0 }; \
801 const u32 digest_tp01[4] = { h0.s1, h1.s1, h2.s1, h3.s1 }; \
802 const u32 digest_tp02[4] = { h0.s2, h1.s2, h2.s2, h3.s2 }; \
803 const u32 digest_tp03[4] = { h0.s3, h1.s3, h2.s3, h3.s3 }; \
804 const u32 digest_tp04[4] = { h0.s4, h1.s4, h2.s4, h3.s4 }; \
805 const u32 digest_tp05[4] = { h0.s5, h1.s5, h2.s5, h3.s5 }; \
806 const u32 digest_tp06[4] = { h0.s6, h1.s6, h2.s6, h3.s6 }; \
807 const u32 digest_tp07[4] = { h0.s7, h1.s7, h2.s7, h3.s7 }; \
808 const u32 digest_tp08[4] = { h0.s8, h1.s8, h2.s8, h3.s8 }; \
809 const u32 digest_tp09[4] = { h0.s9, h1.s9, h2.s9, h3.s9 }; \
810 const u32 digest_tp10[4] = { h0.sa, h1.sa, h2.sa, h3.sa }; \
811 const u32 digest_tp11[4] = { h0.sb, h1.sb, h2.sb, h3.sb }; \
812 const u32 digest_tp12[4] = { h0.sc, h1.sc, h2.sc, h3.sc }; \
813 const u32 digest_tp13[4] = { h0.sd, h1.sd, h2.sd, h3.sd }; \
814 const u32 digest_tp14[4] = { h0.se, h1.se, h2.se, h3.se }; \
815 const u32 digest_tp15[4] = { h0.sf, h1.sf, h2.sf, h3.sf }; \
816 \
817 if (check (digest_tp00, \
818 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
819 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
820 bitmap_mask, \
821 bitmap_shift1, \
822 bitmap_shift2)) \
823 { \
824 int hash_pos = find_hash (digest_tp00, digests_cnt, &digests_buf[digests_offset]); \
825 \
826 if (hash_pos != -1) \
827 { \
828 const u32 final_hash_pos = digests_offset + hash_pos; \
829 \
830 if (vector_accessible (il_pos, il_cnt, 0) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
831 { \
832 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 0); \
833 \
834 d_return_buf[lid] = 1; \
835 } \
836 } \
837 } \
838 \
839 if (check (digest_tp01, \
840 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
841 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
842 bitmap_mask, \
843 bitmap_shift1, \
844 bitmap_shift2)) \
845 { \
846 int hash_pos = find_hash (digest_tp01, digests_cnt, &digests_buf[digests_offset]); \
847 \
848 if (hash_pos != -1) \
849 { \
850 const u32 final_hash_pos = digests_offset + hash_pos; \
851 \
852 if (vector_accessible (il_pos, il_cnt, 1) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
853 { \
854 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 1); \
855 \
856 d_return_buf[lid] = 1; \
857 } \
858 } \
859 } \
860 \
861 if (check (digest_tp02, \
862 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
863 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
864 bitmap_mask, \
865 bitmap_shift1, \
866 bitmap_shift2)) \
867 { \
868 int hash_pos = find_hash (digest_tp02, digests_cnt, &digests_buf[digests_offset]); \
869 \
870 if (hash_pos != -1) \
871 { \
872 const u32 final_hash_pos = digests_offset + hash_pos; \
873 \
874 if (vector_accessible (il_pos, il_cnt, 2) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
875 { \
876 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 2); \
877 \
878 d_return_buf[lid] = 1; \
879 } \
880 } \
881 } \
882 \
883 if (check (digest_tp03, \
884 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
885 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
886 bitmap_mask, \
887 bitmap_shift1, \
888 bitmap_shift2)) \
889 { \
890 int hash_pos = find_hash (digest_tp03, digests_cnt, &digests_buf[digests_offset]); \
891 \
892 if (hash_pos != -1) \
893 { \
894 const u32 final_hash_pos = digests_offset + hash_pos; \
895 \
896 if (vector_accessible (il_pos, il_cnt, 3) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
897 { \
898 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 3); \
899 \
900 d_return_buf[lid] = 1; \
901 } \
902 } \
903 } \
904 \
905 if (check (digest_tp04, \
906 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
907 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
908 bitmap_mask, \
909 bitmap_shift1, \
910 bitmap_shift2)) \
911 { \
912 int hash_pos = find_hash (digest_tp04, digests_cnt, &digests_buf[digests_offset]); \
913 \
914 if (hash_pos != -1) \
915 { \
916 const u32 final_hash_pos = digests_offset + hash_pos; \
917 \
918 if (vector_accessible (il_pos, il_cnt, 4) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
919 { \
920 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 4); \
921 \
922 d_return_buf[lid] = 1; \
923 } \
924 } \
925 } \
926 \
927 if (check (digest_tp05, \
928 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
929 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
930 bitmap_mask, \
931 bitmap_shift1, \
932 bitmap_shift2)) \
933 { \
934 int hash_pos = find_hash (digest_tp05, digests_cnt, &digests_buf[digests_offset]); \
935 \
936 if (hash_pos != -1) \
937 { \
938 const u32 final_hash_pos = digests_offset + hash_pos; \
939 \
940 if (vector_accessible (il_pos, il_cnt, 5) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
941 { \
942 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 5); \
943 \
944 d_return_buf[lid] = 1; \
945 } \
946 } \
947 } \
948 \
949 if (check (digest_tp06, \
950 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
951 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
952 bitmap_mask, \
953 bitmap_shift1, \
954 bitmap_shift2)) \
955 { \
956 int hash_pos = find_hash (digest_tp06, digests_cnt, &digests_buf[digests_offset]); \
957 \
958 if (hash_pos != -1) \
959 { \
960 const u32 final_hash_pos = digests_offset + hash_pos; \
961 \
962 if (vector_accessible (il_pos, il_cnt, 6) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
963 { \
964 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 6); \
965 \
966 d_return_buf[lid] = 1; \
967 } \
968 } \
969 } \
970 \
971 if (check (digest_tp07, \
972 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
973 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
974 bitmap_mask, \
975 bitmap_shift1, \
976 bitmap_shift2)) \
977 { \
978 int hash_pos = find_hash (digest_tp07, digests_cnt, &digests_buf[digests_offset]); \
979 \
980 if (hash_pos != -1) \
981 { \
982 const u32 final_hash_pos = digests_offset + hash_pos; \
983 \
984 if (vector_accessible (il_pos, il_cnt, 7) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
985 { \
986 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 7); \
987 \
988 d_return_buf[lid] = 1; \
989 } \
990 } \
991 } \
992 \
993 if (check (digest_tp08, \
994 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
995 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
996 bitmap_mask, \
997 bitmap_shift1, \
998 bitmap_shift2)) \
999 { \
1000 int hash_pos = find_hash (digest_tp08, digests_cnt, &digests_buf[digests_offset]); \
1001 \
1002 if (hash_pos != -1) \
1003 { \
1004 const u32 final_hash_pos = digests_offset + hash_pos; \
1005 \
1006 if (vector_accessible (il_pos, il_cnt, 8) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1007 { \
1008 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 8); \
1009 \
1010 d_return_buf[lid] = 1; \
1011 } \
1012 } \
1013 } \
1014 \
1015 if (check (digest_tp09, \
1016 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1017 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1018 bitmap_mask, \
1019 bitmap_shift1, \
1020 bitmap_shift2)) \
1021 { \
1022 int hash_pos = find_hash (digest_tp09, digests_cnt, &digests_buf[digests_offset]); \
1023 \
1024 if (hash_pos != -1) \
1025 { \
1026 const u32 final_hash_pos = digests_offset + hash_pos; \
1027 \
1028 if (vector_accessible (il_pos, il_cnt, 9) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1029 { \
1030 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 9); \
1031 \
1032 d_return_buf[lid] = 1; \
1033 } \
1034 } \
1035 } \
1036 \
1037 if (check (digest_tp10, \
1038 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1039 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1040 bitmap_mask, \
1041 bitmap_shift1, \
1042 bitmap_shift2)) \
1043 { \
1044 int hash_pos = find_hash (digest_tp10, digests_cnt, &digests_buf[digests_offset]); \
1045 \
1046 if (hash_pos != -1) \
1047 { \
1048 const u32 final_hash_pos = digests_offset + hash_pos; \
1049 \
1050 if (vector_accessible (il_pos, il_cnt, 10) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1051 { \
1052 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 10); \
1053 \
1054 d_return_buf[lid] = 1; \
1055 } \
1056 } \
1057 } \
1058 \
1059 if (check (digest_tp11, \
1060 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1061 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1062 bitmap_mask, \
1063 bitmap_shift1, \
1064 bitmap_shift2)) \
1065 { \
1066 int hash_pos = find_hash (digest_tp11, digests_cnt, &digests_buf[digests_offset]); \
1067 \
1068 if (hash_pos != -1) \
1069 { \
1070 const u32 final_hash_pos = digests_offset + hash_pos; \
1071 \
1072 if (vector_accessible (il_pos, il_cnt, 11) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1073 { \
1074 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 11); \
1075 \
1076 d_return_buf[lid] = 1; \
1077 } \
1078 } \
1079 } \
1080 \
1081 if (check (digest_tp12, \
1082 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1083 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1084 bitmap_mask, \
1085 bitmap_shift1, \
1086 bitmap_shift2)) \
1087 { \
1088 int hash_pos = find_hash (digest_tp12, digests_cnt, &digests_buf[digests_offset]); \
1089 \
1090 if (hash_pos != -1) \
1091 { \
1092 const u32 final_hash_pos = digests_offset + hash_pos; \
1093 \
1094 if (vector_accessible (il_pos, il_cnt, 12) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1095 { \
1096 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 12); \
1097 \
1098 d_return_buf[lid] = 1; \
1099 } \
1100 } \
1101 } \
1102 \
1103 if (check (digest_tp13, \
1104 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1105 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1106 bitmap_mask, \
1107 bitmap_shift1, \
1108 bitmap_shift2)) \
1109 { \
1110 int hash_pos = find_hash (digest_tp13, digests_cnt, &digests_buf[digests_offset]); \
1111 \
1112 if (hash_pos != -1) \
1113 { \
1114 const u32 final_hash_pos = digests_offset + hash_pos; \
1115 \
1116 if (vector_accessible (il_pos, il_cnt, 13) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1117 { \
1118 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 13); \
1119 \
1120 d_return_buf[lid] = 1; \
1121 } \
1122 } \
1123 } \
1124 \
1125 if (check (digest_tp14, \
1126 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1127 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1128 bitmap_mask, \
1129 bitmap_shift1, \
1130 bitmap_shift2)) \
1131 { \
1132 int hash_pos = find_hash (digest_tp14, digests_cnt, &digests_buf[digests_offset]); \
1133 \
1134 if (hash_pos != -1) \
1135 { \
1136 const u32 final_hash_pos = digests_offset + hash_pos; \
1137 \
1138 if (vector_accessible (il_pos, il_cnt, 14) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1139 { \
1140 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 14); \
1141 \
1142 d_return_buf[lid] = 1; \
1143 } \
1144 } \
1145 } \
1146 \
1147 if (check (digest_tp15, \
1148 bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, \
1149 bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, \
1150 bitmap_mask, \
1151 bitmap_shift1, \
1152 bitmap_shift2)) \
1153 { \
1154 int hash_pos = find_hash (digest_tp15, digests_cnt, &digests_buf[digests_offset]); \
1155 \
1156 if (hash_pos != -1) \
1157 { \
1158 const u32 final_hash_pos = digests_offset + hash_pos; \
1159 \
1160 if (vector_accessible (il_pos, il_cnt, 15) && (atomic_add (&hashes_shown[final_hash_pos], 1) == 0)) \
1161 { \
1162 mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + 15); \
1163 \
1164 d_return_buf[lid] = 1; \
1165 } \
1166 } \
1167 } \
1168 }
1169
1170 #endif
1171
1172 #define MATCHES_NONE_VV(a,b) !(MATCHES_ONE_VV ((a), (b)))
1173 #define MATCHES_NONE_VS(a,b) !(MATCHES_ONE_VS ((a), (b)))
1174
1175 // attack-mode 0
1176
1177 static inline u32x ix_create_bft (__global bf_t *bfs_buf, const u32 il_pos)
1178 {
1179 #if VECT_SIZE == 1
1180 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i);
1181 #elif VECT_SIZE == 2
1182 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
1183 #elif VECT_SIZE == 4
1184 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
1185 #elif VECT_SIZE == 8
1186 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
1187 #elif VECT_SIZE == 16
1188 const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i);
1189 #endif
1190
1191 return ix;
1192 }
1193
1194 // attack-mode 1
1195
1196 static inline u32x pwlenx_create_combt (__global comb_t *combs_buf, const u32 il_pos)
1197 {
1198 #if VECT_SIZE == 1
1199 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len);
1200 #elif VECT_SIZE == 2
1201 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len);
1202 #elif VECT_SIZE == 4
1203 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len);
1204 #elif VECT_SIZE == 8
1205 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len);
1206 #elif VECT_SIZE == 16
1207 const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len, combs_buf[il_pos + 8].pw_len, combs_buf[il_pos + 9].pw_len, combs_buf[il_pos + 10].pw_len, combs_buf[il_pos + 11].pw_len, combs_buf[il_pos + 12].pw_len, combs_buf[il_pos + 13].pw_len, combs_buf[il_pos + 14].pw_len, combs_buf[il_pos + 15].pw_len);
1208 #endif
1209
1210 return pw_lenx;
1211 }
1212
1213 static inline u32x ix_create_combt (__global comb_t *combs_buf, const u32 il_pos, const int idx)
1214 {
1215 #if VECT_SIZE == 1
1216 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx]);
1217 #elif VECT_SIZE == 2
1218 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx]);
1219 #elif VECT_SIZE == 4
1220 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx]);
1221 #elif VECT_SIZE == 8
1222 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx]);
1223 #elif VECT_SIZE == 16
1224 const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx], combs_buf[il_pos + 8].i[idx], combs_buf[il_pos + 9].i[idx], combs_buf[il_pos + 10].i[idx], combs_buf[il_pos + 11].i[idx], combs_buf[il_pos + 12].i[idx], combs_buf[il_pos + 13].i[idx], combs_buf[il_pos + 14].i[idx], combs_buf[il_pos + 15].i[idx]);
1225 #endif
1226
1227 return ix;
1228 }
1229
1230 #if VECT_SIZE == 1
1231 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 1) + 0].var[(idx)])
1232 #elif VECT_SIZE == 2
1233 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 2) + 0].var[(idx)], (arr)[((gid) * 2) + 1].var[(idx)])
1234 #elif VECT_SIZE == 4
1235 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 4) + 0].var[(idx)], (arr)[((gid) * 4) + 1].var[(idx)], (arr)[((gid) * 4) + 2].var[(idx)], (arr)[((gid) * 4) + 3].var[(idx)])
1236 #elif VECT_SIZE == 8
1237 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)])
1238 #elif VECT_SIZE == 16
1239 #define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)])
1240 #endif
1241
1242 #if VECT_SIZE == 1
1243 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 1) + 0].var[(idx)] = val;
1244 #elif VECT_SIZE == 2
1245 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 2) + 0].var[(idx)] = val.s0; (arr)[((gid) * 2) + 1].var[(idx)] = val.s1;
1246 #elif VECT_SIZE == 4
1247 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 4) + 0].var[(idx)] = val.s0; (arr)[((gid) * 4) + 1].var[(idx)] = val.s1; (arr)[((gid) * 4) + 2].var[(idx)] = val.s2; (arr)[((gid) * 4) + 3].var[(idx)] = val.s3;
1248 #elif VECT_SIZE == 8
1249 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] = val.s0; (arr)[((gid) * 8) + 1].var[(idx)] = val.s1; (arr)[((gid) * 8) + 2].var[(idx)] = val.s2; (arr)[((gid) * 8) + 3].var[(idx)] = val.s3; (arr)[((gid) * 8) + 4].var[(idx)] = val.s4; (arr)[((gid) * 8) + 5].var[(idx)] = val.s5; (arr)[((gid) * 8) + 6].var[(idx)] = val.s6; (arr)[((gid) * 8) + 7].var[(idx)] = val.s7;
1250 #elif VECT_SIZE == 16
1251 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] = val.s0; (arr)[((gid) * 16) + 1].var[(idx)] = val.s1; (arr)[((gid) * 16) + 2].var[(idx)] = val.s2; (arr)[((gid) * 16) + 3].var[(idx)] = val.s3; (arr)[((gid) * 16) + 4].var[(idx)] = val.s4; (arr)[((gid) * 16) + 5].var[(idx)] = val.s5; (arr)[((gid) * 16) + 6].var[(idx)] = val.s6; (arr)[((gid) * 16) + 7].var[(idx)] = val.s7; (arr)[((gid) * 16) + 8].var[(idx)] = val.s8; (arr)[((gid) * 16) + 9].var[(idx)] = val.s9; (arr)[((gid) * 16) + 10].var[(idx)] = val.sa; (arr)[((gid) * 16) + 11].var[(idx)] = val.sb; (arr)[((gid) * 16) + 12].var[(idx)] = val.sc; (arr)[((gid) * 16) + 13].var[(idx)] = val.sd; (arr)[((gid) * 16) + 14].var[(idx)] = val.se; (arr)[((gid) * 16) + 15].var[(idx)] = val.sf;
1252 #endif