Initial commit
[hashcat.git] / amd / rp_amd.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 static u32x generate_cmask (u32x buf)
7 {
8 const u32x rmask = ((buf & 0x40404040) >> 1)
9 & ~((buf & 0x80808080) >> 2);
10
11 const u32x hmask = (buf & 0x1f1f1f1f) + 0x05050505;
12 const u32x lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f;
13
14 return rmask & ~hmask & lmask;
15 }
16
17 static void truncate_right (u32x w0[4], u32x w1[4], const u32 len)
18 {
19 const u32 tmp = (1 << ((len % 4) * 8)) - 1;
20
21 switch (len / 4)
22 {
23 case 0: w0[0] &= tmp;
24 w0[1] = 0;
25 w0[2] = 0;
26 w0[3] = 0;
27 w1[0] = 0;
28 w1[1] = 0;
29 w1[2] = 0;
30 w1[3] = 0;
31 break;
32 case 1: w0[1] &= tmp;
33 w0[2] = 0;
34 w0[3] = 0;
35 w1[0] = 0;
36 w1[1] = 0;
37 w1[2] = 0;
38 w1[3] = 0;
39 break;
40 case 2: w0[2] &= tmp;
41 w0[3] = 0;
42 w1[0] = 0;
43 w1[1] = 0;
44 w1[2] = 0;
45 w1[3] = 0;
46 break;
47 case 3: w0[3] &= tmp;
48 w1[0] = 0;
49 w1[1] = 0;
50 w1[2] = 0;
51 w1[3] = 0;
52 break;
53 case 4: w1[0] &= tmp;
54 w1[1] = 0;
55 w1[2] = 0;
56 w1[3] = 0;
57 break;
58 case 5: w1[1] &= tmp;
59 w1[2] = 0;
60 w1[3] = 0;
61 break;
62 case 6: w1[2] &= tmp;
63 w1[3] = 0;
64 break;
65 case 7: w1[3] &= tmp;
66 break;
67 }
68 }
69
70 static void truncate_left (u32x w0[4], u32x w1[4], const u32 len)
71 {
72 const u32 tmp = ~((1 << ((len % 4) * 8)) - 1);
73
74 switch (len / 4)
75 {
76 case 0: w0[0] &= tmp;
77 break;
78 case 1: w0[0] = 0;
79 w0[1] &= tmp;
80 break;
81 case 2: w0[0] = 0;
82 w0[1] = 0;
83 w0[2] &= tmp;
84 break;
85 case 3: w0[0] = 0;
86 w0[1] = 0;
87 w0[2] = 0;
88 w0[3] &= tmp;
89 break;
90 case 4: w0[0] = 0;
91 w0[1] = 0;
92 w0[2] = 0;
93 w0[3] = 0;
94 w1[0] &= tmp;
95 break;
96 case 5: w0[0] = 0;
97 w0[1] = 0;
98 w0[2] = 0;
99 w0[3] = 0;
100 w1[0] = 0;
101 w1[1] &= tmp;
102 break;
103 case 6: w0[0] = 0;
104 w0[1] = 0;
105 w0[2] = 0;
106 w0[3] = 0;
107 w1[0] = 0;
108 w1[1] = 0;
109 w1[2] &= tmp;
110 break;
111 case 7: w0[0] = 0;
112 w0[1] = 0;
113 w0[2] = 0;
114 w0[3] = 0;
115 w1[0] = 0;
116 w1[1] = 0;
117 w1[2] = 0;
118 w1[3] &= tmp;
119 break;
120 }
121 }
122
123 static void lshift_block (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4])
124 {
125 /* cuda
126 out0[0] = in0[0] >> 8 | in0[1] << 24;
127 out0[1] = in0[1] >> 8 | in0[2] << 24;
128 out0[2] = in0[2] >> 8 | in0[3] << 24;
129 out0[3] = in0[3] >> 8 | in1[0] << 24;
130 out1[0] = in1[0] >> 8 | in1[1] << 24;
131 out1[1] = in1[1] >> 8 | in1[2] << 24;
132 out1[2] = in1[2] >> 8 | in1[3] << 24;
133 out1[3] = in1[3] >> 8;
134 */
135
136 out0[0] = amd_bytealign (in0[1], in0[0], 1);
137 out0[1] = amd_bytealign (in0[2], in0[1], 1);
138 out0[2] = amd_bytealign (in0[3], in0[2], 1);
139 out0[3] = amd_bytealign (in1[0], in0[3], 1);
140 out1[0] = amd_bytealign (in1[1], in1[0], 1);
141 out1[1] = amd_bytealign (in1[2], in1[1], 1);
142 out1[2] = amd_bytealign (in1[3], in1[2], 1);
143 out1[3] = amd_bytealign ( 0, in1[3], 1);
144 }
145
146 static void rshift_block (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4])
147 {
148 /* cuda
149 out1[3] = in1[3] << 8 | in1[2] >> 24;
150 out1[2] = in1[2] << 8 | in1[1] >> 24;
151 out1[1] = in1[1] << 8 | in1[0] >> 24;
152 out1[0] = in1[0] << 8 | in0[3] >> 24;
153 out0[3] = in0[3] << 8 | in0[2] >> 24;
154 out0[2] = in0[2] << 8 | in0[1] >> 24;
155 out0[1] = in0[1] << 8 | in0[0] >> 24;
156 out0[0] = in0[0] << 8;
157 */
158
159 out1[3] = amd_bytealign (in1[3], in1[2], 3);
160 out1[2] = amd_bytealign (in1[2], in1[1], 3);
161 out1[1] = amd_bytealign (in1[1], in1[0], 3);
162 out1[0] = amd_bytealign (in1[0], in0[3], 3);
163 out0[3] = amd_bytealign (in0[3], in0[2], 3);
164 out0[2] = amd_bytealign (in0[2], in0[1], 3);
165 out0[1] = amd_bytealign (in0[1], in0[0], 3);
166 out0[0] = amd_bytealign (in0[0], 0, 3);
167 }
168
169 static void lshift_block_N (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4], const u32 num)
170 {
171 switch (num)
172 {
173 case 0: out0[0] = in0[0];
174 out0[1] = in0[1];
175 out0[2] = in0[2];
176 out0[3] = in0[3];
177 out1[0] = in1[0];
178 out1[1] = in1[1];
179 out1[2] = in1[2];
180 out1[3] = in1[3];
181 break;
182 case 1: out0[0] = amd_bytealign (in0[1], in0[0], 1);
183 out0[1] = amd_bytealign (in0[2], in0[1], 1);
184 out0[2] = amd_bytealign (in0[3], in0[2], 1);
185 out0[3] = amd_bytealign (in1[0], in0[3], 1);
186 out1[0] = amd_bytealign (in1[1], in1[0], 1);
187 out1[1] = amd_bytealign (in1[2], in1[1], 1);
188 out1[2] = amd_bytealign (in1[3], in1[2], 1);
189 out1[3] = amd_bytealign ( 0, in1[3], 1);
190 break;
191 case 2: out0[0] = amd_bytealign (in0[1], in0[0], 2);
192 out0[1] = amd_bytealign (in0[2], in0[1], 2);
193 out0[2] = amd_bytealign (in0[3], in0[2], 2);
194 out0[3] = amd_bytealign (in1[0], in0[3], 2);
195 out1[0] = amd_bytealign (in1[1], in1[0], 2);
196 out1[1] = amd_bytealign (in1[2], in1[1], 2);
197 out1[2] = amd_bytealign (in1[3], in1[2], 2);
198 out1[3] = amd_bytealign ( 0, in1[3], 2);
199 break;
200 case 3: out0[0] = amd_bytealign (in0[1], in0[0], 3);
201 out0[1] = amd_bytealign (in0[2], in0[1], 3);
202 out0[2] = amd_bytealign (in0[3], in0[2], 3);
203 out0[3] = amd_bytealign (in1[0], in0[3], 3);
204 out1[0] = amd_bytealign (in1[1], in1[0], 3);
205 out1[1] = amd_bytealign (in1[2], in1[1], 3);
206 out1[2] = amd_bytealign (in1[3], in1[2], 3);
207 out1[3] = amd_bytealign ( 0, in1[3], 3);
208 break;
209 case 4: out0[0] = in0[1];
210 out0[1] = in0[2];
211 out0[2] = in0[3];
212 out0[3] = in1[0];
213 out1[0] = in1[1];
214 out1[1] = in1[2];
215 out1[2] = in1[3];
216 out1[3] = 0;
217 break;
218 case 5: out0[0] = amd_bytealign (in0[2], in0[1], 1);
219 out0[1] = amd_bytealign (in0[3], in0[2], 1);
220 out0[2] = amd_bytealign (in1[0], in0[3], 1);
221 out0[3] = amd_bytealign (in1[1], in1[0], 1);
222 out1[0] = amd_bytealign (in1[2], in1[1], 1);
223 out1[1] = amd_bytealign (in1[3], in1[2], 1);
224 out1[2] = amd_bytealign ( 0, in1[3], 1);
225 out1[3] = 0;
226 break;
227 case 6: out0[0] = amd_bytealign (in0[2], in0[1], 2);
228 out0[1] = amd_bytealign (in0[3], in0[2], 2);
229 out0[2] = amd_bytealign (in1[0], in0[3], 2);
230 out0[3] = amd_bytealign (in1[1], in1[0], 2);
231 out1[0] = amd_bytealign (in1[2], in1[1], 2);
232 out1[1] = amd_bytealign (in1[3], in1[2], 2);
233 out1[2] = amd_bytealign ( 0, in1[3], 2);
234 out1[3] = 0;
235 break;
236 case 7: out0[0] = amd_bytealign (in0[2], in0[1], 3);
237 out0[1] = amd_bytealign (in0[3], in0[2], 3);
238 out0[2] = amd_bytealign (in1[0], in0[3], 3);
239 out0[3] = amd_bytealign (in1[1], in1[0], 3);
240 out1[0] = amd_bytealign (in1[2], in1[1], 3);
241 out1[1] = amd_bytealign (in1[3], in1[2], 3);
242 out1[2] = amd_bytealign ( 0, in1[3], 3);
243 out1[3] = 0;
244 break;
245 case 8: out0[0] = in0[2];
246 out0[1] = in0[3];
247 out0[2] = in1[0];
248 out0[3] = in1[1];
249 out1[0] = in1[2];
250 out1[1] = in1[3];
251 out1[2] = 0;
252 out1[3] = 0;
253 break;
254 case 9: out0[0] = amd_bytealign (in0[3], in0[2], 1);
255 out0[1] = amd_bytealign (in1[0], in0[3], 1);
256 out0[2] = amd_bytealign (in1[1], in1[0], 1);
257 out0[3] = amd_bytealign (in1[2], in1[1], 1);
258 out1[0] = amd_bytealign (in1[3], in1[2], 1);
259 out1[1] = amd_bytealign ( 0, in1[3], 1);
260 out1[2] = 0;
261 out1[3] = 0;
262 break;
263 case 10: out0[0] = amd_bytealign (in0[3], in0[2], 2);
264 out0[1] = amd_bytealign (in1[0], in0[3], 2);
265 out0[2] = amd_bytealign (in1[1], in1[0], 2);
266 out0[3] = amd_bytealign (in1[2], in1[1], 2);
267 out1[0] = amd_bytealign (in1[3], in1[2], 2);
268 out1[1] = amd_bytealign ( 0, in1[3], 2);
269 out1[2] = 0;
270 out1[3] = 0;
271 break;
272 case 11: out0[0] = amd_bytealign (in0[3], in0[2], 3);
273 out0[1] = amd_bytealign (in1[0], in0[3], 3);
274 out0[2] = amd_bytealign (in1[1], in1[0], 3);
275 out0[3] = amd_bytealign (in1[2], in1[1], 3);
276 out1[0] = amd_bytealign (in1[3], in1[2], 3);
277 out1[1] = amd_bytealign ( 0, in1[3], 3);
278 out1[2] = 0;
279 out1[3] = 0;
280 break;
281 case 12: out0[0] = in0[3];
282 out0[1] = in1[0];
283 out0[2] = in1[1];
284 out0[3] = in1[2];
285 out1[0] = in1[3];
286 out1[1] = 0;
287 out1[2] = 0;
288 out1[3] = 0;
289 break;
290 case 13: out0[0] = amd_bytealign (in1[0], in0[3], 1);
291 out0[1] = amd_bytealign (in1[1], in1[0], 1);
292 out0[2] = amd_bytealign (in1[2], in1[1], 1);
293 out0[3] = amd_bytealign (in1[3], in1[2], 1);
294 out1[0] = amd_bytealign ( 0, in1[3], 1);
295 out1[1] = 0;
296 out1[2] = 0;
297 out1[3] = 0;
298 break;
299 case 14: out0[0] = amd_bytealign (in1[0], in0[3], 2);
300 out0[1] = amd_bytealign (in1[1], in1[0], 2);
301 out0[2] = amd_bytealign (in1[2], in1[1], 2);
302 out0[3] = amd_bytealign (in1[3], in1[2], 2);
303 out1[0] = amd_bytealign ( 0, in1[3], 2);
304 out1[1] = 0;
305 out1[2] = 0;
306 out1[3] = 0;
307 break;
308 case 15: out0[0] = amd_bytealign (in1[0], in0[3], 3);
309 out0[1] = amd_bytealign (in1[1], in1[0], 3);
310 out0[2] = amd_bytealign (in1[2], in1[1], 3);
311 out0[3] = amd_bytealign (in1[3], in1[2], 3);
312 out1[0] = amd_bytealign ( 0, in1[3], 3);
313 out1[1] = 0;
314 out1[2] = 0;
315 out1[3] = 0;
316 break;
317 case 16: out0[0] = in1[0];
318 out0[1] = in1[1];
319 out0[2] = in1[2];
320 out0[3] = in1[3];
321 out1[0] = 0;
322 out1[1] = 0;
323 out1[2] = 0;
324 out1[3] = 0;
325 break;
326 case 17: out0[0] = amd_bytealign (in1[1], in1[0], 1);
327 out0[1] = amd_bytealign (in1[2], in1[1], 1);
328 out0[2] = amd_bytealign (in1[3], in1[2], 1);
329 out0[3] = amd_bytealign ( 0, in1[3], 1);
330 out1[0] = 0;
331 out1[1] = 0;
332 out1[2] = 0;
333 out1[3] = 0;
334 break;
335 case 18: out0[0] = amd_bytealign (in1[1], in1[0], 2);
336 out0[1] = amd_bytealign (in1[2], in1[1], 2);
337 out0[2] = amd_bytealign (in1[3], in1[2], 2);
338 out0[3] = amd_bytealign ( 0, in1[3], 2);
339 out1[0] = 0;
340 out1[1] = 0;
341 out1[2] = 0;
342 out1[3] = 0;
343 break;
344 case 19: out0[0] = amd_bytealign (in1[1], in1[0], 3);
345 out0[1] = amd_bytealign (in1[2], in1[1], 3);
346 out0[2] = amd_bytealign (in1[3], in1[2], 3);
347 out0[3] = amd_bytealign ( 0, in1[3], 3);
348 out1[0] = 0;
349 out1[1] = 0;
350 out1[2] = 0;
351 out1[3] = 0;
352 break;
353 case 20: out0[0] = in1[1];
354 out0[1] = in1[2];
355 out0[2] = in1[3];
356 out0[3] = 0;
357 out1[0] = 0;
358 out1[1] = 0;
359 out1[2] = 0;
360 out1[3] = 0;
361 break;
362 case 21: out0[0] = amd_bytealign (in1[2], in1[1], 1);
363 out0[1] = amd_bytealign (in1[3], in1[2], 1);
364 out0[2] = amd_bytealign ( 0, in1[3], 1);
365 out0[3] = 0;
366 out1[0] = 0;
367 out1[1] = 0;
368 out1[2] = 0;
369 out1[3] = 0;
370 break;
371 case 22: out0[0] = amd_bytealign (in1[2], in1[1], 2);
372 out0[1] = amd_bytealign (in1[3], in1[2], 2);
373 out0[2] = amd_bytealign ( 0, in1[3], 2);
374 out0[3] = 0;
375 out1[0] = 0;
376 out1[1] = 0;
377 out1[2] = 0;
378 out1[3] = 0;
379 break;
380 case 23: out0[0] = amd_bytealign (in1[2], in1[1], 3);
381 out0[1] = amd_bytealign (in1[3], in1[2], 3);
382 out0[2] = amd_bytealign ( 0, in1[3], 3);
383 out0[3] = 0;
384 out1[0] = 0;
385 out1[1] = 0;
386 out1[2] = 0;
387 out1[3] = 0;
388 break;
389 case 24: out0[0] = in1[2];
390 out0[1] = in1[3];
391 out0[2] = 0;
392 out0[3] = 0;
393 out1[0] = 0;
394 out1[1] = 0;
395 out1[2] = 0;
396 out1[3] = 0;
397 break;
398 case 25: out0[0] = amd_bytealign (in1[3], in1[2], 1);
399 out0[1] = amd_bytealign ( 0, in1[3], 1);
400 out0[2] = 0;
401 out0[3] = 0;
402 out1[0] = 0;
403 out1[1] = 0;
404 out1[2] = 0;
405 out1[3] = 0;
406 break;
407 case 26: out0[0] = amd_bytealign (in1[3], in1[2], 2);
408 out0[1] = amd_bytealign ( 0, in1[3], 2);
409 out0[2] = 0;
410 out0[3] = 0;
411 out1[0] = 0;
412 out1[1] = 0;
413 out1[2] = 0;
414 out1[3] = 0;
415 break;
416 case 27: out0[0] = amd_bytealign (in1[3], in1[2], 3);
417 out0[1] = amd_bytealign ( 0, in1[3], 3);
418 out0[2] = 0;
419 out0[3] = 0;
420 out1[0] = 0;
421 out1[1] = 0;
422 out1[2] = 0;
423 out1[3] = 0;
424 break;
425 case 28: out0[0] = in1[3];
426 out0[1] = 0;
427 out0[2] = 0;
428 out0[3] = 0;
429 out1[0] = 0;
430 out1[1] = 0;
431 out1[2] = 0;
432 out1[3] = 0;
433 break;
434 case 29: out0[0] = amd_bytealign ( 0, in1[3], 1);
435 out0[1] = 0;
436 out0[2] = 0;
437 out0[3] = 0;
438 out1[0] = 0;
439 out1[1] = 0;
440 out1[2] = 0;
441 out1[3] = 0;
442 break;
443 case 30: out0[0] = amd_bytealign ( 0, in1[3], 2);
444 out0[1] = 0;
445 out0[2] = 0;
446 out0[3] = 0;
447 out1[0] = 0;
448 out1[1] = 0;
449 out1[2] = 0;
450 out1[3] = 0;
451 break;
452 case 31: out0[0] = amd_bytealign ( 0, in1[3], 3);
453 out0[1] = 0;
454 out0[2] = 0;
455 out0[3] = 0;
456 out1[0] = 0;
457 out1[1] = 0;
458 out1[2] = 0;
459 out1[3] = 0;
460 break;
461 }
462 }
463
464 static void rshift_block_N (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4], const u32 num)
465 {
466 switch (num)
467 {
468 case 0: out1[3] = in1[3];
469 out1[2] = in1[2];
470 out1[1] = in1[1];
471 out1[0] = in1[0];
472 out0[3] = in0[3];
473 out0[2] = in0[2];
474 out0[1] = in0[1];
475 out0[0] = in0[0];
476 break;
477 case 1: out1[3] = amd_bytealign (in1[3], in1[2], 3);
478 out1[2] = amd_bytealign (in1[2], in1[1], 3);
479 out1[1] = amd_bytealign (in1[1], in1[0], 3);
480 out1[0] = amd_bytealign (in1[0], in0[3], 3);
481 out0[3] = amd_bytealign (in0[3], in0[2], 3);
482 out0[2] = amd_bytealign (in0[2], in0[1], 3);
483 out0[1] = amd_bytealign (in0[1], in0[0], 3);
484 out0[0] = amd_bytealign (in0[0], 0, 3);
485 break;
486 case 2: out1[3] = amd_bytealign (in1[3], in1[2], 2);
487 out1[2] = amd_bytealign (in1[2], in1[1], 2);
488 out1[1] = amd_bytealign (in1[1], in1[0], 2);
489 out1[0] = amd_bytealign (in1[0], in0[3], 2);
490 out0[3] = amd_bytealign (in0[3], in0[2], 2);
491 out0[2] = amd_bytealign (in0[2], in0[1], 2);
492 out0[1] = amd_bytealign (in0[1], in0[0], 2);
493 out0[0] = amd_bytealign (in0[0], 0, 2);
494 break;
495 case 3: out1[3] = amd_bytealign (in1[3], in1[2], 1);
496 out1[2] = amd_bytealign (in1[2], in1[1], 1);
497 out1[1] = amd_bytealign (in1[1], in1[0], 1);
498 out1[0] = amd_bytealign (in1[0], in0[3], 1);
499 out0[3] = amd_bytealign (in0[3], in0[2], 1);
500 out0[2] = amd_bytealign (in0[2], in0[1], 1);
501 out0[1] = amd_bytealign (in0[1], in0[0], 1);
502 out0[0] = amd_bytealign (in0[0], 0, 1);
503 break;
504 case 4: out1[3] = in1[2];
505 out1[2] = in1[1];
506 out1[1] = in1[0];
507 out1[0] = in0[3];
508 out0[3] = in0[2];
509 out0[2] = in0[1];
510 out0[1] = in0[0];
511 out0[0] = 0;
512 break;
513 case 5: out1[3] = amd_bytealign (in1[2], in1[1], 3);
514 out1[2] = amd_bytealign (in1[1], in1[0], 3);
515 out1[1] = amd_bytealign (in1[0], in0[3], 3);
516 out1[0] = amd_bytealign (in0[3], in0[2], 3);
517 out0[3] = amd_bytealign (in0[2], in0[1], 3);
518 out0[2] = amd_bytealign (in0[1], in0[0], 3);
519 out0[1] = amd_bytealign (in0[0], 0, 3);
520 out0[0] = 0;
521 break;
522 case 6: out1[3] = amd_bytealign (in1[2], in1[1], 2);
523 out1[2] = amd_bytealign (in1[1], in1[0], 2);
524 out1[1] = amd_bytealign (in1[0], in0[3], 2);
525 out1[0] = amd_bytealign (in0[3], in0[2], 2);
526 out0[3] = amd_bytealign (in0[2], in0[1], 2);
527 out0[2] = amd_bytealign (in0[1], in0[0], 2);
528 out0[1] = amd_bytealign (in0[0], 0, 2);
529 out0[0] = 0;
530 break;
531 case 7: out1[3] = amd_bytealign (in1[2], in1[1], 1);
532 out1[2] = amd_bytealign (in1[1], in1[0], 1);
533 out1[1] = amd_bytealign (in1[0], in0[3], 1);
534 out1[0] = amd_bytealign (in0[3], in0[2], 1);
535 out0[3] = amd_bytealign (in0[2], in0[1], 1);
536 out0[2] = amd_bytealign (in0[1], in0[0], 1);
537 out0[1] = amd_bytealign (in0[0], 0, 1);
538 out0[0] = 0;
539 break;
540 case 8: out1[3] = in1[1];
541 out1[2] = in1[0];
542 out1[1] = in0[3];
543 out1[0] = in0[2];
544 out0[3] = in0[1];
545 out0[2] = in0[0];
546 out0[1] = 0;
547 out0[0] = 0;
548 break;
549 case 9: out1[3] = amd_bytealign (in1[1], in1[0], 3);
550 out1[2] = amd_bytealign (in1[0], in0[3], 3);
551 out1[1] = amd_bytealign (in0[3], in0[2], 3);
552 out1[0] = amd_bytealign (in0[2], in0[1], 3);
553 out0[3] = amd_bytealign (in0[1], in0[0], 3);
554 out0[2] = amd_bytealign (in0[0], 0, 3);
555 out0[1] = 0;
556 out0[0] = 0;
557 break;
558 case 10: out1[3] = amd_bytealign (in1[1], in1[0], 2);
559 out1[2] = amd_bytealign (in1[0], in0[3], 2);
560 out1[1] = amd_bytealign (in0[3], in0[2], 2);
561 out1[0] = amd_bytealign (in0[2], in0[1], 2);
562 out0[3] = amd_bytealign (in0[1], in0[0], 2);
563 out0[2] = amd_bytealign (in0[0], 0, 2);
564 out0[1] = 0;
565 out0[0] = 0;
566 break;
567 case 11: out1[3] = amd_bytealign (in1[1], in1[0], 1);
568 out1[2] = amd_bytealign (in1[0], in0[3], 1);
569 out1[1] = amd_bytealign (in0[3], in0[2], 1);
570 out1[0] = amd_bytealign (in0[2], in0[1], 1);
571 out0[3] = amd_bytealign (in0[1], in0[0], 1);
572 out0[2] = amd_bytealign (in0[0], 0, 1);
573 out0[1] = 0;
574 out0[0] = 0;
575 break;
576 case 12: out1[3] = in1[0];
577 out1[2] = in0[3];
578 out1[1] = in0[2];
579 out1[0] = in0[1];
580 out0[3] = in0[0];
581 out0[2] = 0;
582 out0[1] = 0;
583 out0[0] = 0;
584 break;
585 case 13: out1[3] = amd_bytealign (in1[0], in0[3], 3);
586 out1[2] = amd_bytealign (in0[3], in0[2], 3);
587 out1[1] = amd_bytealign (in0[2], in0[1], 3);
588 out1[0] = amd_bytealign (in0[1], in0[0], 3);
589 out0[3] = amd_bytealign (in0[0], 0, 3);
590 out0[2] = 0;
591 out0[1] = 0;
592 out0[0] = 0;
593 break;
594 case 14: out1[3] = amd_bytealign (in1[0], in0[3], 2);
595 out1[2] = amd_bytealign (in0[3], in0[2], 2);
596 out1[1] = amd_bytealign (in0[2], in0[1], 2);
597 out1[0] = amd_bytealign (in0[1], in0[0], 2);
598 out0[3] = amd_bytealign (in0[0], 0, 2);
599 out0[2] = 0;
600 out0[1] = 0;
601 out0[0] = 0;
602 break;
603 case 15: out1[3] = amd_bytealign (in1[0], in0[3], 1);
604 out1[2] = amd_bytealign (in0[3], in0[2], 1);
605 out1[1] = amd_bytealign (in0[2], in0[1], 1);
606 out1[0] = amd_bytealign (in0[1], in0[0], 1);
607 out0[3] = amd_bytealign (in0[0], 0, 1);
608 out0[2] = 0;
609 out0[1] = 0;
610 out0[0] = 0;
611 break;
612 case 16: out1[3] = in0[3];
613 out1[2] = in0[2];
614 out1[1] = in0[1];
615 out1[0] = in0[0];
616 out0[3] = 0;
617 out0[2] = 0;
618 out0[1] = 0;
619 out0[0] = 0;
620 break;
621 case 17: out1[3] = amd_bytealign (in0[3], in0[2], 3);
622 out1[2] = amd_bytealign (in0[2], in0[1], 3);
623 out1[1] = amd_bytealign (in0[1], in0[0], 3);
624 out1[0] = amd_bytealign (in0[0], 0, 3);
625 out0[3] = 0;
626 out0[2] = 0;
627 out0[1] = 0;
628 out0[0] = 0;
629 break;
630 case 18: out1[3] = amd_bytealign (in0[3], in0[2], 2);
631 out1[2] = amd_bytealign (in0[2], in0[1], 2);
632 out1[1] = amd_bytealign (in0[1], in0[0], 2);
633 out1[0] = amd_bytealign (in0[0], 0, 2);
634 out0[3] = 0;
635 out0[2] = 0;
636 out0[1] = 0;
637 out0[0] = 0;
638 break;
639 case 19: out1[3] = amd_bytealign (in0[3], in0[2], 1);
640 out1[2] = amd_bytealign (in0[2], in0[1], 1);
641 out1[1] = amd_bytealign (in0[1], in0[0], 1);
642 out1[0] = amd_bytealign (in0[0], 0, 1);
643 out0[3] = 0;
644 out0[2] = 0;
645 out0[1] = 0;
646 out0[0] = 0;
647 break;
648 case 20: out1[3] = in0[2];
649 out1[2] = in0[1];
650 out1[1] = in0[0];
651 out1[0] = 0;
652 out0[3] = 0;
653 out0[2] = 0;
654 out0[1] = 0;
655 out0[0] = 0;
656 break;
657 case 21: out1[3] = amd_bytealign (in0[2], in0[1], 3);
658 out1[2] = amd_bytealign (in0[1], in0[0], 3);
659 out1[1] = amd_bytealign (in0[0], 0, 3);
660 out1[0] = 0;
661 out0[3] = 0;
662 out0[2] = 0;
663 out0[1] = 0;
664 out0[0] = 0;
665 break;
666 case 22: out1[3] = amd_bytealign (in0[2], in0[1], 2);
667 out1[2] = amd_bytealign (in0[1], in0[0], 2);
668 out1[1] = amd_bytealign (in0[0], 0, 2);
669 out1[0] = 0;
670 out0[3] = 0;
671 out0[2] = 0;
672 out0[1] = 0;
673 out0[0] = 0;
674 break;
675 case 23: out1[3] = amd_bytealign (in0[2], in0[1], 1);
676 out1[2] = amd_bytealign (in0[1], in0[0], 1);
677 out1[1] = amd_bytealign (in0[0], 0, 1);
678 out1[0] = 0;
679 out0[3] = 0;
680 out0[2] = 0;
681 out0[1] = 0;
682 out0[0] = 0;
683 break;
684 case 24: out1[3] = in0[1];
685 out1[2] = in0[0];
686 out1[1] = 0;
687 out1[0] = 0;
688 out0[3] = 0;
689 out0[2] = 0;
690 out0[1] = 0;
691 out0[0] = 0;
692 break;
693 case 25: out1[3] = amd_bytealign (in0[1], in0[0], 3);
694 out1[2] = amd_bytealign (in0[0], 0, 3);
695 out1[1] = 0;
696 out1[0] = 0;
697 out0[3] = 0;
698 out0[2] = 0;
699 out0[1] = 0;
700 out0[0] = 0;
701 break;
702 case 26: out1[3] = amd_bytealign (in0[1], in0[0], 2);
703 out1[2] = amd_bytealign (in0[0], 0, 2);
704 out1[1] = 0;
705 out1[0] = 0;
706 out0[3] = 0;
707 out0[2] = 0;
708 out0[1] = 0;
709 out0[0] = 0;
710 break;
711 case 27: out1[3] = amd_bytealign (in0[1], in0[0], 1);
712 out1[2] = amd_bytealign (in0[0], 0, 1);
713 out1[1] = 0;
714 out1[0] = 0;
715 out0[3] = 0;
716 out0[2] = 0;
717 out0[1] = 0;
718 out0[0] = 0;
719 break;
720 case 28: out1[3] = in0[0];
721 out1[2] = 0;
722 out1[1] = 0;
723 out1[0] = 0;
724 out0[3] = 0;
725 out0[2] = 0;
726 out0[1] = 0;
727 out0[0] = 0;
728 break;
729 case 29: out1[3] = amd_bytealign (in0[0], 0, 3);
730 out1[2] = 0;
731 out1[1] = 0;
732 out1[0] = 0;
733 out0[3] = 0;
734 out0[2] = 0;
735 out0[1] = 0;
736 out0[0] = 0;
737 break;
738 case 30: out1[3] = amd_bytealign (in0[0], 0, 2);
739 out1[2] = 0;
740 out1[1] = 0;
741 out1[0] = 0;
742 out0[3] = 0;
743 out0[2] = 0;
744 out0[1] = 0;
745 out0[0] = 0;
746 break;
747 case 31: out1[3] = amd_bytealign (in0[0], 0, 1);
748 out1[2] = 0;
749 out1[1] = 0;
750 out1[0] = 0;
751 out0[3] = 0;
752 out0[2] = 0;
753 out0[1] = 0;
754 out0[0] = 0;
755 break;
756 }
757 }
758
759 static void append_block1 (const u32 offset, u32x dst0[4], u32x dst1[4], const u32x src_r0)
760 {
761 u32x tmp[2];
762
763 switch (offset & 3)
764 {
765 case 0: tmp[0] = src_r0;
766 tmp[1] = 0;
767 break;
768 case 1: tmp[0] = src_r0 << 8;
769 tmp[1] = src_r0 >> 24;
770 break;
771 case 2: tmp[0] = src_r0 << 16;
772 tmp[1] = src_r0 >> 16;
773 break;
774 case 3: tmp[0] = src_r0 << 24;
775 tmp[1] = src_r0 >> 8;
776 break;
777 }
778
779 switch (offset / 4)
780 {
781 case 0: dst0[0] |= tmp[0];
782 dst0[1] = tmp[1];
783 break;
784 case 1: dst0[1] |= tmp[0];
785 dst0[2] = tmp[1];
786 break;
787 case 2: dst0[2] |= tmp[0];
788 dst0[3] = tmp[1];
789 break;
790 case 3: dst0[3] |= tmp[0];
791 dst1[0] = tmp[1];
792 break;
793 case 4: dst1[0] |= tmp[0];
794 dst1[1] = tmp[1];
795 break;
796 case 5: dst1[1] |= tmp[0];
797 dst1[2] = tmp[1];
798 break;
799 case 6: dst1[2] |= tmp[0];
800 dst1[3] = tmp[1];
801 break;
802 case 7: dst1[3] |= tmp[0];
803 break;
804 }
805 }
806
807 static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[4], const u32x src_l0[4], const u32x src_l1[4], const u32x src_r0[4], const u32x src_r1[4])
808 {
809 switch (offset)
810 {
811 case 0:
812 dst0[0] = src_r0[0];
813 dst0[1] = src_r0[1];
814 dst0[2] = src_r0[2];
815 dst0[3] = src_r0[3];
816 dst1[0] = src_r1[0];
817 dst1[1] = src_r1[1];
818 dst1[2] = src_r1[2];
819 dst1[3] = src_r1[3];
820 break;
821
822 case 1:
823 dst0[0] = src_l0[0]
824 | src_r0[0] << 8;
825 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
826 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
827 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
828 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 3);
829 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 3);
830 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 3);
831 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 3);
832 break;
833
834 case 2:
835 dst0[0] = src_l0[0]
836 | src_r0[0] << 16;
837 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
838 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
839 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
840 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 2);
841 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 2);
842 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 2);
843 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 2);
844 break;
845
846 case 3:
847 dst0[0] = src_l0[0]
848 | src_r0[0] << 24;
849 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
850 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
851 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
852 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 1);
853 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 1);
854 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 1);
855 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 1);
856 break;
857
858 case 4:
859 dst0[1] = src_r0[0];
860 dst0[2] = src_r0[1];
861 dst0[3] = src_r0[2];
862 dst1[0] = src_r0[3];
863 dst1[1] = src_r1[0];
864 dst1[2] = src_r1[1];
865 dst1[3] = src_r1[2];
866 break;
867
868 case 5:
869 dst0[1] = src_l0[1]
870 | src_r0[0] << 8;
871 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
872 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
873 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 3);
874 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 3);
875 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 3);
876 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 3);
877 break;
878
879 case 6:
880 dst0[1] = src_l0[1]
881 | src_r0[0] << 16;
882 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
883 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
884 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 2);
885 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 2);
886 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 2);
887 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 2);
888 break;
889
890 case 7:
891 dst0[1] = src_l0[1]
892 | src_r0[0] << 24;
893 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
894 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
895 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 1);
896 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 1);
897 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 1);
898 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 1);
899 break;
900
901 case 8:
902 dst0[2] = src_r0[0];
903 dst0[3] = src_r0[1];
904 dst1[0] = src_r0[2];
905 dst1[1] = src_r0[3];
906 dst1[2] = src_r1[0];
907 dst1[3] = src_r1[1];
908 break;
909
910 case 9:
911 dst0[2] = src_l0[2]
912 | src_r0[0] << 8;
913 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
914 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 3);
915 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 3);
916 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 3);
917 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 3);
918 break;
919
920 case 10:
921 dst0[2] = src_l0[2]
922 | src_r0[0] << 16;
923 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
924 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 2);
925 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 2);
926 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 2);
927 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 2);
928 break;
929
930 case 11:
931 dst0[2] = src_l0[2]
932 | src_r0[0] << 24;
933 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
934 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 1);
935 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 1);
936 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 1);
937 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 1);
938 break;
939
940 case 12:
941 dst0[3] = src_r0[0];
942 dst1[0] = src_r0[1];
943 dst1[1] = src_r0[2];
944 dst1[2] = src_r0[3];
945 dst1[3] = src_r1[0];
946 break;
947
948 case 13:
949 dst0[3] = src_l0[3]
950 | src_r0[0] << 8;
951 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 3);
952 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 3);
953 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 3);
954 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 3);
955 break;
956
957 case 14:
958 dst0[3] = src_l0[3]
959 | src_r0[0] << 16;
960 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 2);
961 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 2);
962 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 2);
963 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 2);
964 break;
965
966 case 15:
967 dst0[3] = src_l0[3]
968 | src_r0[0] << 24;
969 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 1);
970 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 1);
971 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 1);
972 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 1);
973 break;
974
975 case 16:
976 dst1[0] = src_r0[0];
977 dst1[1] = src_r0[1];
978 dst1[2] = src_r0[2];
979 dst1[3] = src_r0[3];
980 break;
981
982 case 17:
983 dst1[0] = src_l1[0]
984 | src_r0[0] << 8;
985 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
986 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
987 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
988 break;
989
990 case 18:
991 dst1[0] = src_l1[0]
992 | src_r0[0] << 16;
993 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
994 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
995 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
996 break;
997
998 case 19:
999 dst1[0] = src_l1[0]
1000 | src_r0[0] << 24;
1001 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
1002 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
1003 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
1004 break;
1005
1006 case 20:
1007 dst1[1] = src_r0[0];
1008 dst1[2] = src_r0[1];
1009 dst1[3] = src_r0[2];
1010 break;
1011
1012 case 21:
1013 dst1[1] = src_l1[1]
1014 | src_r0[0] << 8;
1015 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
1016 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
1017 break;
1018
1019 case 22:
1020 dst1[1] = src_l1[1]
1021 | src_r0[0] << 16;
1022 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
1023 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
1024 break;
1025
1026 case 23:
1027 dst1[1] = src_l1[1]
1028 | src_r0[0] << 24;
1029 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
1030 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
1031 break;
1032
1033 case 24:
1034 dst1[2] = src_r0[0];
1035 dst1[3] = src_r0[1];
1036 break;
1037
1038 case 25:
1039 dst1[2] = src_l1[2]
1040 | src_r0[0] << 8;
1041 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
1042 break;
1043
1044 case 26:
1045 dst1[2] = src_l1[2]
1046 | src_r0[0] << 16;
1047 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
1048 break;
1049
1050 case 27:
1051 dst1[2] = src_l1[2]
1052 | src_r0[0] << 24;
1053 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
1054 break;
1055
1056 case 28:
1057 dst1[3] = src_r0[0];
1058 break;
1059
1060 case 29:
1061 dst1[3] = src_l1[3]
1062 | src_r0[0] << 8;
1063 break;
1064
1065 case 30:
1066 dst1[3] = src_l1[3]
1067 | src_r0[0] << 16;
1068 break;
1069
1070 case 31:
1071 dst1[3] = src_l1[3]
1072 | src_r0[0] << 24;
1073 break;
1074 }
1075 }
1076
1077 static void reverse_block (u32x in0[4], u32x in1[4], u32x out0[4], u32x out1[4], const u32 len)
1078 {
1079 rshift_block_N (in0, in1, out0, out1, 32 - len);
1080
1081 u32x tib40[4];
1082 u32x tib41[4];
1083
1084 tib40[0] = out1[3];
1085 tib40[1] = out1[2];
1086 tib40[2] = out1[1];
1087 tib40[3] = out1[0];
1088 tib41[0] = out0[3];
1089 tib41[1] = out0[2];
1090 tib41[2] = out0[1];
1091 tib41[3] = out0[0];
1092
1093 out0[0] = swap_workaround (tib40[0]);
1094 out0[1] = swap_workaround (tib40[1]);
1095 out0[2] = swap_workaround (tib40[2]);
1096 out0[3] = swap_workaround (tib40[3]);
1097 out1[0] = swap_workaround (tib41[0]);
1098 out1[1] = swap_workaround (tib41[1]);
1099 out1[2] = swap_workaround (tib41[2]);
1100 out1[3] = swap_workaround (tib41[3]);
1101 }
1102
1103 static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1104 {
1105 buf0[0] |= (generate_cmask (buf0[0]));
1106 buf0[1] |= (generate_cmask (buf0[1]));
1107 buf0[2] |= (generate_cmask (buf0[2]));
1108 buf0[3] |= (generate_cmask (buf0[3]));
1109 buf1[0] |= (generate_cmask (buf1[0]));
1110 buf1[1] |= (generate_cmask (buf1[1]));
1111 buf1[2] |= (generate_cmask (buf1[2]));
1112 buf1[3] |= (generate_cmask (buf1[3]));
1113
1114 return in_len;
1115 }
1116
1117 static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1118 {
1119 buf0[0] &= ~(generate_cmask (buf0[0]));
1120 buf0[1] &= ~(generate_cmask (buf0[1]));
1121 buf0[2] &= ~(generate_cmask (buf0[2]));
1122 buf0[3] &= ~(generate_cmask (buf0[3]));
1123 buf1[0] &= ~(generate_cmask (buf1[0]));
1124 buf1[1] &= ~(generate_cmask (buf1[1]));
1125 buf1[2] &= ~(generate_cmask (buf1[2]));
1126 buf1[3] &= ~(generate_cmask (buf1[3]));
1127
1128 return in_len;
1129 }
1130
1131 static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1132 {
1133 rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len);
1134
1135 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
1136
1137 return in_len;
1138 }
1139
1140 static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1141 {
1142 rule_op_mangle_urest (p0, p1, buf0, buf1, in_len);
1143
1144 buf0[0] |= (0x00000020 & generate_cmask (buf0[0]));
1145
1146 return in_len;
1147 }
1148
1149 static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1150 {
1151 buf0[0] ^= (generate_cmask (buf0[0]));
1152 buf0[1] ^= (generate_cmask (buf0[1]));
1153 buf0[2] ^= (generate_cmask (buf0[2]));
1154 buf0[3] ^= (generate_cmask (buf0[3]));
1155 buf1[0] ^= (generate_cmask (buf1[0]));
1156 buf1[1] ^= (generate_cmask (buf1[1]));
1157 buf1[2] ^= (generate_cmask (buf1[2]));
1158 buf1[3] ^= (generate_cmask (buf1[3]));
1159
1160 return in_len;
1161 }
1162
1163 static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1164 {
1165 if (p0 >= in_len) return (in_len);
1166
1167 const u32 tmp = 0x20 << ((p0 & 3) * 8);
1168
1169 switch (p0 / 4)
1170 {
1171 case 0: buf0[0] ^= (tmp & generate_cmask (buf0[0])); break;
1172 case 1: buf0[1] ^= (tmp & generate_cmask (buf0[1])); break;
1173 case 2: buf0[2] ^= (tmp & generate_cmask (buf0[2])); break;
1174 case 3: buf0[3] ^= (tmp & generate_cmask (buf0[3])); break;
1175 case 4: buf1[0] ^= (tmp & generate_cmask (buf1[0])); break;
1176 case 5: buf1[1] ^= (tmp & generate_cmask (buf1[1])); break;
1177 case 6: buf1[2] ^= (tmp & generate_cmask (buf1[2])); break;
1178 case 7: buf1[3] ^= (tmp & generate_cmask (buf1[3])); break;
1179 }
1180
1181 return in_len;
1182 }
1183
1184 static u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1185 {
1186 reverse_block (buf0, buf1, buf0, buf1, in_len);
1187
1188 return in_len;
1189 }
1190
1191 static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1192 {
1193 if ((in_len + in_len) >= 32) return (in_len);
1194
1195 u32 out_len = in_len;
1196
1197 u32x tib40[4];
1198 u32x tib41[4];
1199
1200 tib40[0] = buf0[0];
1201 tib40[1] = buf0[1];
1202 tib40[2] = buf0[2];
1203 tib40[3] = buf0[3];
1204 tib41[0] = buf1[0];
1205 tib41[1] = buf1[1];
1206 tib41[2] = buf1[2];
1207 tib41[3] = buf1[3];
1208
1209 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
1210
1211 out_len += in_len;
1212
1213 return out_len;
1214 }
1215
1216 static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1217 {
1218 if (((in_len * p0) + in_len) >= 32) return (in_len);
1219
1220 u32 out_len = in_len;
1221
1222 u32x tib40[4];
1223 u32x tib41[4];
1224
1225 tib40[0] = buf0[0];
1226 tib40[1] = buf0[1];
1227 tib40[2] = buf0[2];
1228 tib40[3] = buf0[3];
1229 tib41[0] = buf1[0];
1230 tib41[1] = buf1[1];
1231 tib41[2] = buf1[2];
1232 tib41[3] = buf1[3];
1233
1234 for (u32 i = 0; i < p0; i++)
1235 {
1236 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
1237
1238 out_len += in_len;
1239 }
1240
1241 return out_len;
1242 }
1243
1244 static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1245 {
1246 if ((in_len + in_len) >= 32) return (in_len);
1247
1248 u32 out_len = in_len;
1249
1250 u32x tib40[4];
1251 u32x tib41[4];
1252
1253 reverse_block (buf0, buf1, tib40, tib41, out_len);
1254
1255 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
1256
1257 out_len += in_len;
1258
1259 return out_len;
1260 }
1261
1262 static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1263 {
1264 if ((in_len + 1) >= 32) return (in_len);
1265
1266 u32 out_len = in_len;
1267
1268 append_block1 (out_len, buf0, buf1, p0);
1269
1270 out_len++;
1271
1272 return out_len;
1273 }
1274
1275 static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1276 {
1277 if ((in_len + 1) >= 32) return (in_len);
1278
1279 u32 out_len = in_len;
1280
1281 rshift_block (buf0, buf1, buf0, buf1);
1282
1283 buf0[0] = buf0[0] | p0;
1284
1285 out_len++;
1286
1287 return out_len;
1288 }
1289
1290 static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1291 {
1292 if (in_len == 0) return (in_len);
1293
1294 const u32 in_len1 = in_len - 1;
1295
1296 const u32 sh = (in_len1 & 3) * 8;
1297
1298 const u32x tmp = (buf0[0] & 0xff) << sh;
1299
1300 lshift_block (buf0, buf1, buf0, buf1);
1301
1302 switch (in_len1 / 4)
1303 {
1304 case 0: buf0[0] |= tmp; break;
1305 case 1: buf0[1] |= tmp; break;
1306 case 2: buf0[2] |= tmp; break;
1307 case 3: buf0[3] |= tmp; break;
1308 case 4: buf1[0] |= tmp; break;
1309 case 5: buf1[1] |= tmp; break;
1310 case 6: buf1[2] |= tmp; break;
1311 case 7: buf1[3] |= tmp; break;
1312 }
1313
1314 return in_len;
1315 }
1316
1317 static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1318 {
1319 if (in_len == 0) return (in_len);
1320
1321 const u32 in_len1 = in_len - 1;
1322
1323 const u32 sh = (in_len1 & 3) * 8;
1324
1325 u32x tmp = 0;
1326
1327 switch (in_len1 / 4)
1328 {
1329 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
1330 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
1331 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
1332 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
1333 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
1334 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
1335 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
1336 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
1337 }
1338
1339 rshift_block (buf0, buf1, buf0, buf1);
1340
1341 buf0[0] |= tmp;
1342
1343 truncate_right (buf0, buf1, in_len);
1344
1345 return in_len;
1346 }
1347
1348 static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1349 {
1350 if (in_len == 0) return (in_len);
1351
1352 const u32 in_len1 = in_len - 1;
1353
1354 lshift_block (buf0, buf1, buf0, buf1);
1355
1356 return in_len1;
1357 }
1358
1359 static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1360 {
1361 if (in_len == 0) return (in_len);
1362
1363 const u32 in_len1 = in_len - 1;
1364
1365 const u32 tmp = (1 << ((in_len1 & 3) * 8)) - 1;
1366
1367 switch (in_len1 / 4)
1368 {
1369 case 0: buf0[0] &= tmp; break;
1370 case 1: buf0[1] &= tmp; break;
1371 case 2: buf0[2] &= tmp; break;
1372 case 3: buf0[3] &= tmp; break;
1373 case 4: buf1[0] &= tmp; break;
1374 case 5: buf1[1] &= tmp; break;
1375 case 6: buf1[2] &= tmp; break;
1376 case 7: buf1[3] &= tmp; break;
1377 }
1378
1379 return in_len1;
1380 }
1381
1382 static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1383 {
1384 if (p0 >= in_len) return (in_len);
1385
1386 u32 out_len = in_len;
1387
1388 u32x tib40[4];
1389 u32x tib41[4];
1390
1391 lshift_block (buf0, buf1, tib40, tib41);
1392
1393 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
1394 const u32 mr = ~ml;
1395
1396 switch (p0 / 4)
1397 {
1398 case 0: buf0[0] = (buf0[0] & ml)
1399 | (tib40[0] & mr);
1400 buf0[1] = tib40[1];
1401 buf0[2] = tib40[2];
1402 buf0[3] = tib40[3];
1403 buf1[0] = tib41[0];
1404 buf1[1] = tib41[1];
1405 buf1[2] = tib41[2];
1406 buf1[3] = tib41[3];
1407 break;
1408 case 1: buf0[1] = (buf0[1] & ml)
1409 | (tib40[1] & mr);
1410 buf0[2] = tib40[2];
1411 buf0[3] = tib40[3];
1412 buf1[0] = tib41[0];
1413 buf1[1] = tib41[1];
1414 buf1[2] = tib41[2];
1415 buf1[3] = tib41[3];
1416 break;
1417 case 2: buf0[2] = (buf0[2] & ml)
1418 | (tib40[2] & mr);
1419 buf0[3] = tib40[3];
1420 buf1[0] = tib41[0];
1421 buf1[1] = tib41[1];
1422 buf1[2] = tib41[2];
1423 buf1[3] = tib41[3];
1424 break;
1425 case 3: buf0[3] = (buf0[3] & ml)
1426 | (tib40[3] & mr);
1427 buf1[0] = tib41[0];
1428 buf1[1] = tib41[1];
1429 buf1[2] = tib41[2];
1430 buf1[3] = tib41[3];
1431 break;
1432 case 4: buf1[0] = (buf1[0] & ml)
1433 | (tib41[0] & mr);
1434 buf1[1] = tib41[1];
1435 buf1[2] = tib41[2];
1436 buf1[3] = tib41[3];
1437 break;
1438 case 5: buf1[1] = (buf1[1] & ml)
1439 | (tib41[1] & mr);
1440 buf1[2] = tib41[2];
1441 buf1[3] = tib41[3];
1442 break;
1443 case 6: buf1[2] = (buf1[2] & ml)
1444 | (tib41[2] & mr);
1445 buf1[3] = tib41[3];
1446 break;
1447 case 7: buf1[3] = (buf1[3] & ml)
1448 | (tib41[3] & mr);
1449 break;
1450 }
1451
1452 out_len--;
1453
1454 return out_len;
1455 }
1456
1457 static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1458 {
1459 if (p0 >= in_len) return (in_len);
1460
1461 if ((p0 + p1) > in_len) return (in_len);
1462
1463 u32 out_len = p1;
1464
1465 lshift_block_N (buf0, buf1, buf0, buf1, p0);
1466
1467 truncate_right (buf0, buf1, out_len);
1468
1469 return out_len;
1470 }
1471
1472 static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1473 {
1474 if (p0 >= in_len) return (in_len);
1475
1476 if ((p0 + p1) > in_len) return (in_len);
1477
1478 u32 out_len = in_len;
1479
1480 u32x tib40[4];
1481 u32x tib41[4];
1482
1483 tib40[0] = 0;
1484 tib40[1] = 0;
1485 tib40[2] = 0;
1486 tib40[3] = 0;
1487 tib41[0] = 0;
1488 tib41[1] = 0;
1489 tib41[2] = 0;
1490 tib41[3] = 0;
1491
1492 lshift_block_N (buf0, buf1, tib40, tib41, p1);
1493
1494 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
1495 const u32 mr = ~ml;
1496
1497 switch (p0 / 4)
1498 {
1499 case 0: buf0[0] = (buf0[0] & ml)
1500 | (tib40[0] & mr);
1501 buf0[1] = tib40[1];
1502 buf0[2] = tib40[2];
1503 buf0[3] = tib40[3];
1504 buf1[0] = tib41[0];
1505 buf1[1] = tib41[1];
1506 buf1[2] = tib41[2];
1507 buf1[3] = tib41[3];
1508 break;
1509 case 1: buf0[1] = (buf0[1] & ml)
1510 | (tib40[1] & mr);
1511 buf0[2] = tib40[2];
1512 buf0[3] = tib40[3];
1513 buf1[0] = tib41[0];
1514 buf1[1] = tib41[1];
1515 buf1[2] = tib41[2];
1516 buf1[3] = tib41[3];
1517 break;
1518 case 2: buf0[2] = (buf0[2] & ml)
1519 | (tib40[2] & mr);
1520 buf0[3] = tib40[3];
1521 buf1[0] = tib41[0];
1522 buf1[1] = tib41[1];
1523 buf1[2] = tib41[2];
1524 buf1[3] = tib41[3];
1525 break;
1526 case 3: buf0[3] = (buf0[3] & ml)
1527 | (tib40[3] & mr);
1528 buf1[0] = tib41[0];
1529 buf1[1] = tib41[1];
1530 buf1[2] = tib41[2];
1531 buf1[3] = tib41[3];
1532 break;
1533 case 4: buf1[0] = (buf1[0] & ml)
1534 | (tib41[0] & mr);
1535 buf1[1] = tib41[1];
1536 buf1[2] = tib41[2];
1537 buf1[3] = tib41[3];
1538 break;
1539 case 5: buf1[1] = (buf1[1] & ml)
1540 | (tib41[1] & mr);
1541 buf1[2] = tib41[2];
1542 buf1[3] = tib41[3];
1543 break;
1544 case 6: buf1[2] = (buf1[2] & ml)
1545 | (tib41[2] & mr);
1546 buf1[3] = tib41[3];
1547 break;
1548 case 7: buf1[3] = (buf1[3] & ml)
1549 | (tib41[3] & mr);
1550 break;
1551 }
1552
1553 out_len -= p1;
1554
1555 return out_len;
1556 }
1557
1558 static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1559 {
1560 if (p0 > in_len) return (in_len);
1561
1562 if ((in_len + 1) >= 32) return (in_len);
1563
1564 u32 out_len = in_len;
1565
1566 u32x tib40[4];
1567 u32x tib41[4];
1568
1569 rshift_block (buf0, buf1, tib40, tib41);
1570
1571 const u32 p1n = p1 << ((p0 & 3) * 8);
1572
1573 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
1574
1575 const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
1576
1577 switch (p0 / 4)
1578 {
1579 case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr);
1580 buf0[1] = tib40[1];
1581 buf0[2] = tib40[2];
1582 buf0[3] = tib40[3];
1583 buf1[0] = tib41[0];
1584 buf1[1] = tib41[1];
1585 buf1[2] = tib41[2];
1586 buf1[3] = tib41[3];
1587 break;
1588 case 1: buf0[1] = (buf0[1] & ml) | p1n | (tib40[1] & mr);
1589 buf0[2] = tib40[2];
1590 buf0[3] = tib40[3];
1591 buf1[0] = tib41[0];
1592 buf1[1] = tib41[1];
1593 buf1[2] = tib41[2];
1594 buf1[3] = tib41[3];
1595 break;
1596 case 2: buf0[2] = (buf0[2] & ml) | p1n | (tib40[2] & mr);
1597 buf0[3] = tib40[3];
1598 buf1[0] = tib41[0];
1599 buf1[1] = tib41[1];
1600 buf1[2] = tib41[2];
1601 buf1[3] = tib41[3];
1602 break;
1603 case 3: buf0[3] = (buf0[3] & ml) | p1n | (tib40[3] & mr);
1604 buf1[0] = tib41[0];
1605 buf1[1] = tib41[1];
1606 buf1[2] = tib41[2];
1607 buf1[3] = tib41[3];
1608 break;
1609 case 4: buf1[0] = (buf1[0] & ml) | p1n | (tib41[0] & mr);
1610 buf1[1] = tib41[1];
1611 buf1[2] = tib41[2];
1612 buf1[3] = tib41[3];
1613 break;
1614 case 5: buf1[1] = (buf1[1] & ml) | p1n | (tib41[1] & mr);
1615 buf1[2] = tib41[2];
1616 buf1[3] = tib41[3];
1617 break;
1618 case 6: buf1[2] = (buf1[2] & ml) | p1n | (tib41[2] & mr);
1619 buf1[3] = tib41[3];
1620 break;
1621 case 7: buf1[3] = (buf1[3] & ml) | p1n | (tib41[3] & mr);
1622 break;
1623 }
1624
1625 out_len++;
1626
1627 return out_len;
1628 }
1629
1630 static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1631 {
1632 if (p0 >= in_len) return (in_len);
1633
1634 const u32 p1n = p1 << ((p0 & 3) * 8);
1635
1636 const u32 m = ~(0xff << ((p0 & 3) * 8));
1637
1638 switch (p0 / 4)
1639 {
1640 case 0: buf0[0] = (buf0[0] & m) | p1n; break;
1641 case 1: buf0[1] = (buf0[1] & m) | p1n; break;
1642 case 2: buf0[2] = (buf0[2] & m) | p1n; break;
1643 case 3: buf0[3] = (buf0[3] & m) | p1n; break;
1644 case 4: buf1[0] = (buf1[0] & m) | p1n; break;
1645 case 5: buf1[1] = (buf1[1] & m) | p1n; break;
1646 case 6: buf1[2] = (buf1[2] & m) | p1n; break;
1647 case 7: buf1[3] = (buf1[3] & m) | p1n; break;
1648 }
1649
1650 return in_len;
1651 }
1652
1653 static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1654 {
1655 if (p0 >= in_len) return (in_len);
1656
1657 truncate_right (buf0, buf1, p0);
1658
1659 return p0;
1660 }
1661
1662 static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1663 {
1664 #ifdef VECT_SIZE1
1665
1666 const uchar4 tmp0 = (uchar4) (p0);
1667 const uchar4 tmp1 = (uchar4) (p1);
1668
1669 uchar4 tmp;
1670
1671 tmp = as_uchar4 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint (tmp);
1672 tmp = as_uchar4 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint (tmp);
1673 tmp = as_uchar4 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint (tmp);
1674 tmp = as_uchar4 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint (tmp);
1675 tmp = as_uchar4 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint (tmp);
1676 tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp);
1677 tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp);
1678 tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp);
1679
1680 #endif
1681
1682 #ifdef VECT_SIZE2
1683
1684 const uchar8 tmp0 = (uchar8) (p0);
1685 const uchar8 tmp1 = (uchar8) (p1);
1686
1687 uchar8 tmp;
1688
1689 tmp = as_uchar8 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint2 (tmp);
1690 tmp = as_uchar8 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint2 (tmp);
1691 tmp = as_uchar8 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint2 (tmp);
1692 tmp = as_uchar8 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint2 (tmp);
1693 tmp = as_uchar8 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint2 (tmp);
1694 tmp = as_uchar8 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint2 (tmp);
1695 tmp = as_uchar8 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint2 (tmp);
1696 tmp = as_uchar8 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint2 (tmp);
1697
1698 #endif
1699
1700 #ifdef VECT_SIZE4
1701
1702 const uchar16 tmp0 = (uchar16) (p0);
1703 const uchar16 tmp1 = (uchar16) (p1);
1704
1705 uchar16 tmp;
1706
1707 tmp = as_uchar16 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint4 (tmp);
1708 tmp = as_uchar16 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint4 (tmp);
1709 tmp = as_uchar16 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint4 (tmp);
1710 tmp = as_uchar16 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint4 (tmp);
1711 tmp = as_uchar16 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint4 (tmp);
1712 tmp = as_uchar16 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint4 (tmp);
1713 tmp = as_uchar16 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint4 (tmp);
1714 tmp = as_uchar16 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint4 (tmp);
1715
1716 #endif
1717
1718 return in_len;
1719 }
1720
1721 static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1722 {
1723 // TODO
1724 return in_len;
1725 }
1726
1727 static u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1728 {
1729 // TODO
1730 return in_len;
1731 }
1732
1733 static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1734 {
1735 if ( in_len == 0) return (in_len);
1736 if ((in_len + p0) >= 32) return (in_len);
1737
1738 u32 out_len = in_len;
1739
1740 const u32x tmp = buf0[0] & 0xFF;
1741
1742 rshift_block_N (buf0, buf1, buf0, buf1, p0);
1743
1744 switch (p0)
1745 {
1746 case 1: buf0[0] |= tmp << 0;
1747 break;
1748 case 2: buf0[0] |= tmp << 0 | tmp << 8;
1749 break;
1750 case 3: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16;
1751 break;
1752 case 4: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1753 break;
1754 case 5: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1755 buf0[1] |= tmp << 0;
1756 break;
1757 case 6: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1758 buf0[1] |= tmp << 0 | tmp << 8;
1759 break;
1760 case 7: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1761 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16;
1762 break;
1763 case 8: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1764 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1765 break;
1766 case 9: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1767 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1768 buf0[2] |= tmp << 0;
1769 break;
1770 case 10: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1771 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1772 buf0[2] |= tmp << 0 | tmp << 8;
1773 break;
1774 case 11: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1775 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1776 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16;
1777 break;
1778 case 12: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1779 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1780 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1781 break;
1782 case 13: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1783 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1784 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1785 buf0[3] |= tmp << 0;
1786 break;
1787 case 14: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1788 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1789 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1790 buf0[3] |= tmp << 0 | tmp << 8;
1791 break;
1792 case 15: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1793 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1794 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1795 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16;
1796 break;
1797 case 16: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1798 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1799 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1800 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1801 break;
1802 case 17: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1803 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1804 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1805 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1806 buf1[0] |= tmp << 0;
1807 break;
1808 case 18: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1809 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1810 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1811 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1812 buf1[0] |= tmp << 0 | tmp << 8;
1813 break;
1814 case 19: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1815 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1816 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1817 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1818 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16;
1819 break;
1820 case 20: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1821 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1822 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1823 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1824 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1825 break;
1826 case 21: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1827 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1828 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1829 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1830 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1831 buf1[1] |= tmp << 0;
1832 break;
1833 case 22: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1834 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1835 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1836 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1837 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1838 buf1[1] |= tmp << 0 | tmp << 8;
1839 break;
1840 case 23: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1841 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1842 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1843 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1844 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1845 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16;
1846 break;
1847 case 24: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1848 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1849 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1850 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1851 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1852 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1853 break;
1854 case 25: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1855 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1856 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1857 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1858 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1859 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1860 buf1[2] |= tmp << 0;
1861 break;
1862 case 26: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1863 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1864 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1865 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1866 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1867 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1868 buf1[2] |= tmp << 0 | tmp << 8;
1869 break;
1870 case 27: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1871 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1872 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1873 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1874 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1875 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1876 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16;
1877 break;
1878 case 28: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1879 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1880 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1881 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1882 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1883 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1884 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1885 break;
1886 case 29: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1887 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1888 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1889 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1890 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1891 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1892 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1893 buf1[3] |= tmp << 0;
1894 break;
1895 case 30: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1896 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1897 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1898 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1899 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1900 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1901 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1902 buf1[3] |= tmp << 0 | tmp << 8;
1903 break;
1904 case 31: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1905 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1906 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1907 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1908 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1909 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1910 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
1911 buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16;
1912 break;
1913 }
1914
1915 out_len += p0;
1916
1917 return out_len;
1918 }
1919
1920 static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1921 {
1922 if ( in_len == 0) return (in_len);
1923 if ((in_len + p0) >= 32) return (in_len);
1924
1925 const u32 in_len1 = in_len - 1;
1926
1927 const u32 sh = (in_len1 & 3) * 8;
1928
1929 u32x tmp = 0;
1930
1931 switch (in_len1 / 4)
1932 {
1933 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
1934 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
1935 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
1936 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
1937 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
1938 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
1939 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
1940 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
1941 }
1942
1943 u32 out_len = in_len;
1944
1945 for (u32 i = 0; i < p0; i++)
1946 {
1947 append_block1 (out_len, buf0, buf1, tmp);
1948
1949 out_len++;
1950 }
1951
1952 return out_len;
1953 }
1954
1955 static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1956 {
1957 if ( in_len == 0) return (in_len);
1958 if ((in_len + in_len) >= 32) return (in_len);
1959
1960 u32 out_len = in_len;
1961
1962 u32x tib40[4];
1963 u32x tib41[4];
1964
1965 tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8);
1966 tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8);
1967 tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8);
1968 tib40[3] = ((buf0[1] & 0x00FF0000) >> 16) | ((buf0[1] & 0xFF000000) >> 8);
1969 tib41[0] = ((buf0[2] & 0x000000FF) << 0) | ((buf0[2] & 0x0000FF00) << 8);
1970 tib41[1] = ((buf0[2] & 0x00FF0000) >> 16) | ((buf0[2] & 0xFF000000) >> 8);
1971 tib41[2] = ((buf0[3] & 0x000000FF) << 0) | ((buf0[3] & 0x0000FF00) << 8);
1972 tib41[3] = ((buf0[3] & 0x00FF0000) >> 16) | ((buf0[3] & 0xFF000000) >> 8);
1973
1974 buf0[0] = tib40[0] | (tib40[0] << 8);
1975 buf0[1] = tib40[1] | (tib40[1] << 8);
1976 buf0[2] = tib40[2] | (tib40[2] << 8);
1977 buf0[3] = tib40[3] | (tib40[3] << 8);
1978 buf1[0] = tib41[0] | (tib41[0] << 8);
1979 buf1[1] = tib41[1] | (tib41[1] << 8);
1980 buf1[2] = tib41[2] | (tib41[2] << 8);
1981 buf1[3] = tib41[3] | (tib41[3] << 8);
1982
1983 out_len = out_len + out_len;
1984
1985 return out_len;
1986 }
1987
1988 static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1989 {
1990 if (in_len < 2) return (in_len);
1991
1992 buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
1993
1994 return in_len;
1995 }
1996
1997 static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
1998 {
1999 if (in_len < 2) return (in_len);
2000
2001 switch (in_len)
2002 {
2003 case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
2004 break;
2005 case 3: buf0[0] = (buf0[0] & 0x000000FF) | ((buf0[0] << 8) & 0x00FF0000) | ((buf0[0] >> 8) & 0x0000FF00);
2006 break;
2007 case 4: buf0[0] = (buf0[0] & 0x0000FFFF) | ((buf0[0] << 8) & 0xFF000000) | ((buf0[0] >> 8) & 0x00FF0000);
2008 break;
2009 case 5: buf0[1] = (buf0[0] & 0xFF000000) | buf0[1];
2010 buf0[0] = (buf0[0] & 0x00FFFFFF) | (buf0[1] << 24);
2011 buf0[1] = (buf0[1] >> 24);
2012 break;
2013 case 6: buf0[1] = ((buf0[1] << 8) & 0x0000FF00) | ((buf0[1] >> 8) & 0x000000FF);
2014 break;
2015 case 7: buf0[1] = (buf0[1] & 0x000000FF) | ((buf0[1] << 8) & 0x00FF0000) | ((buf0[1] >> 8) & 0x0000FF00);
2016 break;
2017 case 8: buf0[1] = (buf0[1] & 0x0000FFFF) | ((buf0[1] << 8) & 0xFF000000) | ((buf0[1] >> 8) & 0x00FF0000);
2018 break;
2019 case 9: buf0[2] = (buf0[1] & 0xFF000000) | buf0[2];
2020 buf0[1] = (buf0[1] & 0x00FFFFFF) | (buf0[2] << 24);
2021 buf0[2] = (buf0[2] >> 24);
2022 break;
2023 case 10: buf0[2] = ((buf0[2] << 8) & 0x0000FF00) | ((buf0[2] >> 8) & 0x000000FF);
2024 break;
2025 case 11: buf0[2] = (buf0[2] & 0x000000FF) | ((buf0[2] << 8) & 0x00FF0000) | ((buf0[2] >> 8) & 0x0000FF00);
2026 break;
2027 case 12: buf0[2] = (buf0[2] & 0x0000FFFF) | ((buf0[2] << 8) & 0xFF000000) | ((buf0[2] >> 8) & 0x00FF0000);
2028 break;
2029 case 13: buf0[3] = (buf0[2] & 0xFF000000) | buf0[3];
2030 buf0[2] = (buf0[2] & 0x00FFFFFF) | (buf0[3] << 24);
2031 buf0[3] = (buf0[3] >> 24);
2032 break;
2033 case 14: buf0[3] = ((buf0[3] << 8) & 0x0000FF00) | ((buf0[3] >> 8) & 0x000000FF);
2034 break;
2035 case 15: buf0[3] = (buf0[3] & 0x000000FF) | ((buf0[3] << 8) & 0x00FF0000) | ((buf0[3] >> 8) & 0x0000FF00);
2036 break;
2037 case 16: buf0[3] = (buf0[3] & 0x0000FFFF) | ((buf0[3] << 8) & 0xFF000000) | ((buf0[3] >> 8) & 0x00FF0000);
2038 break;
2039 case 17: buf1[0] = (buf0[3] & 0xFF000000) | buf1[0];
2040 buf0[3] = (buf0[3] & 0x00FFFFFF) | (buf1[0] << 24);
2041 buf1[0] = (buf1[0] >> 24);
2042 break;
2043 case 18: buf1[0] = ((buf1[0] << 8) & 0x0000FF00) | ((buf1[0] >> 8) & 0x000000FF);
2044 break;
2045 case 19: buf1[0] = (buf1[0] & 0x000000FF) | ((buf1[0] << 8) & 0x00FF0000) | ((buf1[0] >> 8) & 0x0000FF00);
2046 break;
2047 case 20: buf1[0] = (buf1[0] & 0x0000FFFF) | ((buf1[0] << 8) & 0xFF000000) | ((buf1[0] >> 8) & 0x00FF0000);
2048 break;
2049 case 21: buf1[1] = (buf1[0] & 0xFF000000) | buf1[1];
2050 buf1[0] = (buf1[0] & 0x00FFFFFF) | (buf1[1] << 24);
2051 buf1[1] = (buf1[1] >> 24);
2052 break;
2053 case 22: buf1[1] = ((buf1[1] << 8) & 0x0000FF00) | ((buf1[1] >> 8) & 0x000000FF);
2054 break;
2055 case 23: buf1[1] = (buf1[1] & 0x000000FF) | ((buf1[1] << 8) & 0x00FF0000) | ((buf1[1] >> 8) & 0x0000FF00);
2056 break;
2057 case 24: buf1[1] = (buf1[1] & 0x0000FFFF) | ((buf1[1] << 8) & 0xFF000000) | ((buf1[1] >> 8) & 0x00FF0000);
2058 break;
2059 case 25: buf1[2] = (buf1[1] & 0xFF000000) | buf1[2];
2060 buf1[1] = (buf1[1] & 0x00FFFFFF) | (buf1[2] << 24);
2061 buf1[2] = (buf1[2] >> 24);
2062 break;
2063 case 26: buf1[2] = ((buf1[2] << 8) & 0x0000FF00) | ((buf1[2] >> 8) & 0x000000FF);
2064 break;
2065 case 27: buf1[2] = (buf1[2] & 0x000000FF) | ((buf1[2] << 8) & 0x00FF0000) | ((buf1[2] >> 8) & 0x0000FF00);
2066 break;
2067 case 28: buf1[2] = (buf1[2] & 0x0000FFFF) | ((buf1[2] << 8) & 0xFF000000) | ((buf1[2] >> 8) & 0x00FF0000);
2068 break;
2069 case 29: buf1[3] = (buf1[2] & 0xFF000000) | buf1[3];
2070 buf1[2] = (buf1[2] & 0x00FFFFFF) | (buf1[3] << 24);
2071 buf1[3] = (buf1[3] >> 24);
2072 break;
2073 case 30: buf1[3] = ((buf1[3] << 8) & 0x0000FF00) | ((buf1[3] >> 8) & 0x000000FF);
2074 break;
2075 case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00);
2076 break;
2077 }
2078
2079 return in_len;
2080 }
2081
2082 static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2083 {
2084 if (p0 >= in_len) return (in_len);
2085 if (p1 >= in_len) return (in_len);
2086
2087 u32x tmp0 = 0;
2088 u32x tmp1 = 0;
2089
2090 switch (p0)
2091 {
2092 case 0: tmp0 = (buf0[0] >> 0) & 0xFF;
2093 break;
2094 case 1: tmp0 = (buf0[0] >> 8) & 0xFF;
2095 break;
2096 case 2: tmp0 = (buf0[0] >> 16) & 0xFF;
2097 break;
2098 case 3: tmp0 = (buf0[0] >> 24) & 0xFF;
2099 break;
2100 case 4: tmp0 = (buf0[1] >> 0) & 0xFF;
2101 break;
2102 case 5: tmp0 = (buf0[1] >> 8) & 0xFF;
2103 break;
2104 case 6: tmp0 = (buf0[1] >> 16) & 0xFF;
2105 break;
2106 case 7: tmp0 = (buf0[1] >> 24) & 0xFF;
2107 break;
2108 case 8: tmp0 = (buf0[2] >> 0) & 0xFF;
2109 break;
2110 case 9: tmp0 = (buf0[2] >> 8) & 0xFF;
2111 break;
2112 case 10: tmp0 = (buf0[2] >> 16) & 0xFF;
2113 break;
2114 case 11: tmp0 = (buf0[2] >> 24) & 0xFF;
2115 break;
2116 case 12: tmp0 = (buf0[3] >> 0) & 0xFF;
2117 break;
2118 case 13: tmp0 = (buf0[3] >> 8) & 0xFF;
2119 break;
2120 case 14: tmp0 = (buf0[3] >> 16) & 0xFF;
2121 break;
2122 case 15: tmp0 = (buf0[3] >> 24) & 0xFF;
2123 break;
2124 case 16: tmp0 = (buf1[0] >> 0) & 0xFF;
2125 break;
2126 case 17: tmp0 = (buf1[0] >> 8) & 0xFF;
2127 break;
2128 case 18: tmp0 = (buf1[0] >> 16) & 0xFF;
2129 break;
2130 case 19: tmp0 = (buf1[0] >> 24) & 0xFF;
2131 break;
2132 case 20: tmp0 = (buf1[1] >> 0) & 0xFF;
2133 break;
2134 case 21: tmp0 = (buf1[1] >> 8) & 0xFF;
2135 break;
2136 case 22: tmp0 = (buf1[1] >> 16) & 0xFF;
2137 break;
2138 case 23: tmp0 = (buf1[1] >> 24) & 0xFF;
2139 break;
2140 case 24: tmp0 = (buf1[2] >> 0) & 0xFF;
2141 break;
2142 case 25: tmp0 = (buf1[2] >> 8) & 0xFF;
2143 break;
2144 case 26: tmp0 = (buf1[2] >> 16) & 0xFF;
2145 break;
2146 case 27: tmp0 = (buf1[2] >> 24) & 0xFF;
2147 break;
2148 case 28: tmp0 = (buf1[3] >> 0) & 0xFF;
2149 break;
2150 case 29: tmp0 = (buf1[3] >> 8) & 0xFF;
2151 break;
2152 case 30: tmp0 = (buf1[3] >> 16) & 0xFF;
2153 break;
2154 case 31: tmp0 = (buf1[3] >> 24) & 0xFF;
2155 break;
2156 }
2157
2158 switch (p1)
2159 {
2160 case 0: tmp1 = (buf0[0] >> 0) & 0xff;
2161 buf0[0] = (buf0[0] & 0xffffff00) | tmp0 << 0;
2162 break;
2163 case 1: tmp1 = (buf0[0] >> 8) & 0xff;
2164 buf0[0] = (buf0[0] & 0xffff00ff) | tmp0 << 8;
2165 break;
2166 case 2: tmp1 = (buf0[0] >> 16) & 0xff;
2167 buf0[0] = (buf0[0] & 0xff00ffff) | tmp0 << 16;
2168 break;
2169 case 3: tmp1 = (buf0[0] >> 24) & 0xff;
2170 buf0[0] = (buf0[0] & 0x00ffffff) | tmp0 << 24;
2171 break;
2172 case 4: tmp1 = (buf0[1] >> 0) & 0xff;
2173 buf0[1] = (buf0[1] & 0xffffff00) | tmp0 << 0;
2174 break;
2175 case 5: tmp1 = (buf0[1] >> 8) & 0xff;
2176 buf0[1] = (buf0[1] & 0xffff00ff) | tmp0 << 8;
2177 break;
2178 case 6: tmp1 = (buf0[1] >> 16) & 0xff;
2179 buf0[1] = (buf0[1] & 0xff00ffff) | tmp0 << 16;
2180 break;
2181 case 7: tmp1 = (buf0[1] >> 24) & 0xff;
2182 buf0[1] = (buf0[1] & 0x00ffffff) | tmp0 << 24;
2183 break;
2184 case 8: tmp1 = (buf0[2] >> 0) & 0xff;
2185 buf0[2] = (buf0[2] & 0xffffff00) | tmp0 << 0;
2186 break;
2187 case 9: tmp1 = (buf0[2] >> 8) & 0xff;
2188 buf0[2] = (buf0[2] & 0xffff00ff) | tmp0 << 8;
2189 break;
2190 case 10: tmp1 = (buf0[2] >> 16) & 0xff;
2191 buf0[2] = (buf0[2] & 0xff00ffff) | tmp0 << 16;
2192 break;
2193 case 11: tmp1 = (buf0[2] >> 24) & 0xff;
2194 buf0[2] = (buf0[2] & 0x00ffffff) | tmp0 << 24;
2195 break;
2196 case 12: tmp1 = (buf0[3] >> 0) & 0xff;
2197 buf0[3] = (buf0[3] & 0xffffff00) | tmp0 << 0;
2198 break;
2199 case 13: tmp1 = (buf0[3] >> 8) & 0xff;
2200 buf0[3] = (buf0[3] & 0xffff00ff) | tmp0 << 8;
2201 break;
2202 case 14: tmp1 = (buf0[3] >> 16) & 0xff;
2203 buf0[3] = (buf0[3] & 0xff00ffff) | tmp0 << 16;
2204 break;
2205 case 15: tmp1 = (buf0[3] >> 24) & 0xff;
2206 buf0[3] = (buf0[3] & 0x00ffffff) | tmp0 << 24;
2207 break;
2208 case 16: tmp1 = (buf1[0] >> 0) & 0xff;
2209 buf1[0] = (buf1[0] & 0xffffff00) | tmp0 << 0;
2210 break;
2211 case 17: tmp1 = (buf1[0] >> 8) & 0xff;
2212 buf1[0] = (buf1[0] & 0xffff00ff) | tmp0 << 8;
2213 break;
2214 case 18: tmp1 = (buf1[0] >> 16) & 0xff;
2215 buf1[0] = (buf1[0] & 0xff00ffff) | tmp0 << 16;
2216 break;
2217 case 19: tmp1 = (buf1[0] >> 24) & 0xff;
2218 buf1[0] = (buf1[0] & 0x00ffffff) | tmp0 << 24;
2219 break;
2220 case 20: tmp1 = (buf1[1] >> 0) & 0xff;
2221 buf1[1] = (buf1[1] & 0xffffff00) | tmp0 << 0;
2222 break;
2223 case 21: tmp1 = (buf1[1] >> 8) & 0xff;
2224 buf1[1] = (buf1[1] & 0xffff00ff) | tmp0 << 8;
2225 break;
2226 case 22: tmp1 = (buf1[1] >> 16) & 0xff;
2227 buf1[1] = (buf1[1] & 0xff00ffff) | tmp0 << 16;
2228 break;
2229 case 23: tmp1 = (buf1[1] >> 24) & 0xff;
2230 buf1[1] = (buf1[1] & 0x00ffffff) | tmp0 << 24;
2231 break;
2232 case 24: tmp1 = (buf1[2] >> 0) & 0xff;
2233 buf1[2] = (buf1[2] & 0xffffff00) | tmp0 << 0;
2234 break;
2235 case 25: tmp1 = (buf1[2] >> 8) & 0xff;
2236 buf1[2] = (buf1[2] & 0xffff00ff) | tmp0 << 8;
2237 break;
2238 case 26: tmp1 = (buf1[2] >> 16) & 0xff;
2239 buf1[2] = (buf1[2] & 0xff00ffff) | tmp0 << 16;
2240 break;
2241 case 27: tmp1 = (buf1[2] >> 24) & 0xff;
2242 buf1[2] = (buf1[2] & 0x00ffffff) | tmp0 << 24;
2243 break;
2244 case 28: tmp1 = (buf1[3] >> 0) & 0xff;
2245 buf1[3] = (buf1[3] & 0xffffff00) | tmp0 << 0;
2246 break;
2247 case 29: tmp1 = (buf1[3] >> 8) & 0xff;
2248 buf1[3] = (buf1[3] & 0xffff00ff) | tmp0 << 8;
2249 break;
2250 case 30: tmp1 = (buf1[3] >> 16) & 0xff;
2251 buf1[3] = (buf1[3] & 0xff00ffff) | tmp0 << 16;
2252 break;
2253 case 31: tmp1 = (buf1[3] >> 24) & 0xff;
2254 buf1[3] = (buf1[3] & 0x00ffffff) | tmp0 << 24;
2255 break;
2256 }
2257
2258 switch (p0)
2259 {
2260 case 0: buf0[0] = (buf0[0] & 0xffffff00) | tmp1 << 0;
2261 break;
2262 case 1: buf0[0] = (buf0[0] & 0xffff00ff) | tmp1 << 8;
2263 break;
2264 case 2: buf0[0] = (buf0[0] & 0xff00ffff) | tmp1 << 16;
2265 break;
2266 case 3: buf0[0] = (buf0[0] & 0x00ffffff) | tmp1 << 24;
2267 break;
2268 case 4: buf0[1] = (buf0[1] & 0xffffff00) | tmp1 << 0;
2269 break;
2270 case 5: buf0[1] = (buf0[1] & 0xffff00ff) | tmp1 << 8;
2271 break;
2272 case 6: buf0[1] = (buf0[1] & 0xff00ffff) | tmp1 << 16;
2273 break;
2274 case 7: buf0[1] = (buf0[1] & 0x00ffffff) | tmp1 << 24;
2275 break;
2276 case 8: buf0[2] = (buf0[2] & 0xffffff00) | tmp1 << 0;
2277 break;
2278 case 9: buf0[2] = (buf0[2] & 0xffff00ff) | tmp1 << 8;
2279 break;
2280 case 10: buf0[2] = (buf0[2] & 0xff00ffff) | tmp1 << 16;
2281 break;
2282 case 11: buf0[2] = (buf0[2] & 0x00ffffff) | tmp1 << 24;
2283 break;
2284 case 12: buf0[3] = (buf0[3] & 0xffffff00) | tmp1 << 0;
2285 break;
2286 case 13: buf0[3] = (buf0[3] & 0xffff00ff) | tmp1 << 8;
2287 break;
2288 case 14: buf0[3] = (buf0[3] & 0xff00ffff) | tmp1 << 16;
2289 break;
2290 case 15: buf0[3] = (buf0[3] & 0x00ffffff) | tmp1 << 24;
2291 break;
2292 case 16: buf1[0] = (buf1[0] & 0xffffff00) | tmp1 << 0;
2293 break;
2294 case 17: buf1[0] = (buf1[0] & 0xffff00ff) | tmp1 << 8;
2295 break;
2296 case 18: buf1[0] = (buf1[0] & 0xff00ffff) | tmp1 << 16;
2297 break;
2298 case 19: buf1[0] = (buf1[0] & 0x00ffffff) | tmp1 << 24;
2299 break;
2300 case 20: buf1[1] = (buf1[1] & 0xffffff00) | tmp1 << 0;
2301 break;
2302 case 21: buf1[1] = (buf1[1] & 0xffff00ff) | tmp1 << 8;
2303 break;
2304 case 22: buf1[1] = (buf1[1] & 0xff00ffff) | tmp1 << 16;
2305 break;
2306 case 23: buf1[1] = (buf1[1] & 0x00ffffff) | tmp1 << 24;
2307 break;
2308 case 24: buf1[2] = (buf1[2] & 0xffffff00) | tmp1 << 0;
2309 break;
2310 case 25: buf1[2] = (buf1[2] & 0xffff00ff) | tmp1 << 8;
2311 break;
2312 case 26: buf1[2] = (buf1[2] & 0xff00ffff) | tmp1 << 16;
2313 break;
2314 case 27: buf1[2] = (buf1[2] & 0x00ffffff) | tmp1 << 24;
2315 break;
2316 case 28: buf1[3] = (buf1[3] & 0xffffff00) | tmp1 << 0;
2317 break;
2318 case 29: buf1[3] = (buf1[3] & 0xffff00ff) | tmp1 << 8;
2319 break;
2320 case 30: buf1[3] = (buf1[3] & 0xff00ffff) | tmp1 << 16;
2321 break;
2322 case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24;
2323 break;
2324 }
2325
2326 return in_len;
2327 }
2328
2329 static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2330 {
2331 if (p0 >= in_len) return (in_len);
2332
2333 const u32 mr = 0xff << ((p0 & 3) * 8);
2334 const u32 ml = ~mr;
2335
2336 switch (p0 / 4)
2337 {
2338 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) << 1) & mr); break;
2339 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) << 1) & mr); break;
2340 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) << 1) & mr); break;
2341 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) << 1) & mr); break;
2342 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) << 1) & mr); break;
2343 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) << 1) & mr); break;
2344 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) << 1) & mr); break;
2345 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) << 1) & mr); break;
2346 }
2347
2348 return in_len;
2349 }
2350
2351 static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2352 {
2353 if (p0 >= in_len) return (in_len);
2354
2355 const u32 mr = 0xff << ((p0 & 3) * 8);
2356 const u32 ml = ~mr;
2357
2358 switch (p0 / 4)
2359 {
2360 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) >> 1) & mr); break;
2361 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) >> 1) & mr); break;
2362 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) >> 1) & mr); break;
2363 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) >> 1) & mr); break;
2364 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) >> 1) & mr); break;
2365 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) >> 1) & mr); break;
2366 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) >> 1) & mr); break;
2367 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) >> 1) & mr); break;
2368 }
2369
2370 return in_len;
2371 }
2372
2373 static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2374 {
2375 if (p0 >= in_len) return (in_len);
2376
2377 const u32 mr = 0xff << ((p0 & 3) * 8);
2378 const u32 ml = ~mr;
2379
2380 const u32 n = 0x01010101 & mr;
2381
2382 switch (p0 / 4)
2383 {
2384 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) + n) & mr); break;
2385 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) + n) & mr); break;
2386 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) + n) & mr); break;
2387 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) + n) & mr); break;
2388 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) + n) & mr); break;
2389 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) + n) & mr); break;
2390 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) + n) & mr); break;
2391 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) + n) & mr); break;
2392 }
2393
2394 return in_len;
2395 }
2396
2397 static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2398 {
2399 if (p0 >= in_len) return (in_len);
2400
2401 const u32 mr = 0xff << ((p0 & 3) * 8);
2402 const u32 ml = ~mr;
2403
2404 const u32 n = 0x01010101 & mr;
2405
2406 switch (p0 / 4)
2407 {
2408 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) - n) & mr); break;
2409 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) - n) & mr); break;
2410 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) - n) & mr); break;
2411 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) - n) & mr); break;
2412 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) - n) & mr); break;
2413 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) - n) & mr); break;
2414 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) - n) & mr); break;
2415 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) - n) & mr); break;
2416 }
2417
2418 return in_len;
2419 }
2420
2421 static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2422 {
2423 if ((p0 + 1) >= in_len) return (in_len);
2424
2425 u32x tib40[4];
2426 u32x tib41[4];
2427
2428 lshift_block (buf0, buf1, tib40, tib41);
2429
2430 const u32 mr = 0xff << ((p0 & 3) * 8);
2431 const u32 ml = ~mr;
2432
2433 switch (p0 / 4)
2434 {
2435 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
2436 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
2437 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
2438 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
2439 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
2440 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
2441 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
2442 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
2443 }
2444
2445 return in_len;
2446 }
2447
2448 static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2449 {
2450 if (p0 == 0) return (in_len);
2451
2452 if (p0 >= in_len) return (in_len);
2453
2454 u32x tib40[4];
2455 u32x tib41[4];
2456
2457 rshift_block (buf0, buf1, tib40, tib41);
2458
2459 const u32 mr = 0xff << ((p0 & 3) * 8);
2460 const u32 ml = ~mr;
2461
2462 switch (p0 / 4)
2463 {
2464 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
2465 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
2466 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
2467 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
2468 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
2469 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
2470 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
2471 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
2472 }
2473
2474 return in_len;
2475 }
2476
2477 static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2478 {
2479 if (p0 > in_len) return (in_len);
2480
2481 if ((in_len + p0) >= 32) return (in_len);
2482
2483 u32 out_len = in_len;
2484
2485 u32x tib40[4];
2486 u32x tib41[4];
2487
2488 tib40[0] = buf0[0];
2489 tib40[1] = buf0[1];
2490 tib40[2] = buf0[2];
2491 tib40[3] = buf0[3];
2492 tib41[0] = buf1[0];
2493 tib41[1] = buf1[1];
2494 tib41[2] = buf1[2];
2495 tib41[3] = buf1[3];
2496
2497 truncate_right (tib40, tib41, p0);
2498
2499 rshift_block_N (buf0, buf1, buf0, buf1, p0);
2500
2501 buf0[0] |= tib40[0];
2502 buf0[1] |= tib40[1];
2503 buf0[2] |= tib40[2];
2504 buf0[3] |= tib40[3];
2505 buf1[0] |= tib41[0];
2506 buf1[1] |= tib41[1];
2507 buf1[2] |= tib41[2];
2508 buf1[3] |= tib41[3];
2509
2510 out_len += p0;
2511
2512 return out_len;
2513 }
2514
2515 static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2516 {
2517 if (p0 > in_len) return (in_len);
2518
2519 if ((in_len + p0) >= 32) return (in_len);
2520
2521 u32 out_len = in_len;
2522
2523 u32x tib40[4];
2524 u32x tib41[4];
2525
2526 rshift_block_N (buf0, buf1, tib40, tib41, p0);
2527
2528 truncate_left (tib40, tib41, out_len);
2529
2530 buf0[0] |= tib40[0];
2531 buf0[1] |= tib40[1];
2532 buf0[2] |= tib40[2];
2533 buf0[3] |= tib40[3];
2534 buf1[0] |= tib41[0];
2535 buf1[1] |= tib41[1];
2536 buf1[2] |= tib41[2];
2537 buf1[3] |= tib41[3];
2538
2539 out_len += p0;
2540
2541 return out_len;
2542 }
2543
2544 static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2545 {
2546 buf0[0] |= (generate_cmask (buf0[0]));
2547 buf0[1] |= (generate_cmask (buf0[1]));
2548 buf0[2] |= (generate_cmask (buf0[2]));
2549 buf0[3] |= (generate_cmask (buf0[3]));
2550 buf1[0] |= (generate_cmask (buf1[0]));
2551 buf1[1] |= (generate_cmask (buf1[1]));
2552 buf1[2] |= (generate_cmask (buf1[2]));
2553 buf1[3] |= (generate_cmask (buf1[3]));
2554
2555 #ifdef VECT_SIZE1
2556
2557 u32x tib40[4];
2558 u32x tib41[4];
2559
2560 const uchar4 tmp0 = (uchar4) (' ');
2561 const uchar4 tmp1 = (uchar4) (0x00);
2562 const uchar4 tmp2 = (uchar4) (0xff);
2563
2564 uchar4 tmp;
2565
2566 tmp = as_uchar4 (buf0[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[0] = as_uint (tmp);
2567 tmp = as_uchar4 (buf0[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[1] = as_uint (tmp);
2568 tmp = as_uchar4 (buf0[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[2] = as_uint (tmp);
2569 tmp = as_uchar4 (buf0[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[3] = as_uint (tmp);
2570 tmp = as_uchar4 (buf1[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[0] = as_uint (tmp);
2571 tmp = as_uchar4 (buf1[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[1] = as_uint (tmp);
2572 tmp = as_uchar4 (buf1[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[2] = as_uint (tmp);
2573 tmp = as_uchar4 (buf1[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[3] = as_uint (tmp);
2574
2575 rshift_block (tib40, tib41, tib40, tib41); tib40[0] |= 0xff;
2576
2577 buf0[0] &= ~(generate_cmask (buf0[0]) & tib40[0]);
2578 buf0[1] &= ~(generate_cmask (buf0[1]) & tib40[1]);
2579 buf0[2] &= ~(generate_cmask (buf0[2]) & tib40[2]);
2580 buf0[3] &= ~(generate_cmask (buf0[3]) & tib40[3]);
2581 buf1[0] &= ~(generate_cmask (buf1[0]) & tib41[0]);
2582 buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]);
2583 buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]);
2584 buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]);
2585
2586 #else
2587
2588 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
2589
2590 // The VLIW1 code above freezes VLIW4 and VLIW5 systems
2591
2592 for (u32 i = 0; i < in_len; i++)
2593 {
2594 u32x tmp0;
2595 u32x tmp1;
2596
2597 switch (i)
2598 {
2599 case 0: tmp0 = (buf0[0] >> 0) & 0xFF;
2600 tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break;
2601 case 1: tmp0 = (buf0[0] >> 8) & 0xFF;
2602 tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break;
2603 case 2: tmp0 = (buf0[0] >> 16) & 0xFF;
2604 tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break;
2605 case 3: tmp0 = (buf0[0] >> 24) & 0xFF;
2606 tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break;
2607 case 4: tmp0 = (buf0[1] >> 0) & 0xFF;
2608 tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break;
2609 case 5: tmp0 = (buf0[1] >> 8) & 0xFF;
2610 tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break;
2611 case 6: tmp0 = (buf0[1] >> 16) & 0xFF;
2612 tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break;
2613 case 7: tmp0 = (buf0[1] >> 24) & 0xFF;
2614 tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break;
2615 case 8: tmp0 = (buf0[2] >> 0) & 0xFF;
2616 tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break;
2617 case 9: tmp0 = (buf0[2] >> 8) & 0xFF;
2618 tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break;
2619 case 10: tmp0 = (buf0[2] >> 16) & 0xFF;
2620 tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break;
2621 case 11: tmp0 = (buf0[2] >> 24) & 0xFF;
2622 tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break;
2623 case 12: tmp0 = (buf0[3] >> 0) & 0xFF;
2624 tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break;
2625 case 13: tmp0 = (buf0[3] >> 8) & 0xFF;
2626 tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break;
2627 case 14: tmp0 = (buf0[3] >> 16) & 0xFF;
2628 tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break;
2629 case 15: tmp0 = (buf0[3] >> 24) & 0xFF;
2630 tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break;
2631 case 16: tmp0 = (buf1[0] >> 0) & 0xFF;
2632 tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break;
2633 case 17: tmp0 = (buf1[0] >> 8) & 0xFF;
2634 tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break;
2635 case 18: tmp0 = (buf1[0] >> 16) & 0xFF;
2636 tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break;
2637 case 19: tmp0 = (buf1[0] >> 24) & 0xFF;
2638 tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break;
2639 case 20: tmp0 = (buf1[1] >> 0) & 0xFF;
2640 tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break;
2641 case 21: tmp0 = (buf1[1] >> 8) & 0xFF;
2642 tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break;
2643 case 22: tmp0 = (buf1[1] >> 16) & 0xFF;
2644 tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break;
2645 case 23: tmp0 = (buf1[1] >> 24) & 0xFF;
2646 tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break;
2647 case 24: tmp0 = (buf1[2] >> 0) & 0xFF;
2648 tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break;
2649 case 25: tmp0 = (buf1[2] >> 8) & 0xFF;
2650 tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break;
2651 case 26: tmp0 = (buf1[2] >> 16) & 0xFF;
2652 tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break;
2653 case 27: tmp0 = (buf1[2] >> 24) & 0xFF;
2654 tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break;
2655 case 28: tmp0 = (buf1[3] >> 0) & 0xFF;
2656 tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break;
2657 case 29: tmp0 = (buf1[3] >> 8) & 0xFF;
2658 tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break;
2659 case 30: tmp0 = (buf1[3] >> 16) & 0xFF;
2660 tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break;
2661 }
2662
2663 #ifdef VECT_SIZE2
2664 if (i < 3)
2665 {
2666 if (tmp0.s0 == ' ') buf0[0].s0 &= tmp1.s0;
2667 if (tmp0.s1 == ' ') buf0[0].s1 &= tmp1.s1;
2668 }
2669 else if (i < 7)
2670 {
2671 if (tmp0.s0 == ' ') buf0[1].s0 &= tmp1.s0;
2672 if (tmp0.s1 == ' ') buf0[1].s1 &= tmp1.s1;
2673 }
2674 else if (i < 11)
2675 {
2676 if (tmp0.s0 == ' ') buf0[2].s0 &= tmp1.s0;
2677 if (tmp0.s1 == ' ') buf0[2].s1 &= tmp1.s1;
2678 }
2679 else if (i < 15)
2680 {
2681 if (tmp0.s0 == ' ') buf0[3].s0 &= tmp1.s0;
2682 if (tmp0.s1 == ' ') buf0[3].s1 &= tmp1.s1;
2683 }
2684 else if (i < 19)
2685 {
2686 if (tmp0.s0 == ' ') buf1[0].s0 &= tmp1.s0;
2687 if (tmp0.s1 == ' ') buf1[0].s1 &= tmp1.s1;
2688 }
2689 else if (i < 23)
2690 {
2691 if (tmp0.s0 == ' ') buf1[1].s0 &= tmp1.s0;
2692 if (tmp0.s1 == ' ') buf1[1].s1 &= tmp1.s1;
2693 }
2694 else if (i < 27)
2695 {
2696 if (tmp0.s0 == ' ') buf1[2].s0 &= tmp1.s0;
2697 if (tmp0.s1 == ' ') buf1[2].s1 &= tmp1.s1;
2698 }
2699 else if (i < 31)
2700 {
2701 if (tmp0.s0 == ' ') buf1[3].s0 &= tmp1.s0;
2702 if (tmp0.s1 == ' ') buf1[3].s1 &= tmp1.s1;
2703 }
2704 #endif
2705
2706 #ifdef VECT_SIZE4
2707 if (i < 3)
2708 {
2709 if (tmp0.s0 == ' ') buf0[0].s0 &= tmp1.s0;
2710 if (tmp0.s1 == ' ') buf0[0].s1 &= tmp1.s1;
2711 if (tmp0.s2 == ' ') buf0[0].s2 &= tmp1.s2;
2712 if (tmp0.s3 == ' ') buf0[0].s3 &= tmp1.s3;
2713 }
2714 else if (i < 7)
2715 {
2716 if (tmp0.s0 == ' ') buf0[1].s0 &= tmp1.s0;
2717 if (tmp0.s1 == ' ') buf0[1].s1 &= tmp1.s1;
2718 if (tmp0.s2 == ' ') buf0[1].s2 &= tmp1.s2;
2719 if (tmp0.s3 == ' ') buf0[1].s3 &= tmp1.s3;
2720 }
2721 else if (i < 11)
2722 {
2723 if (tmp0.s0 == ' ') buf0[2].s0 &= tmp1.s0;
2724 if (tmp0.s1 == ' ') buf0[2].s1 &= tmp1.s1;
2725 if (tmp0.s2 == ' ') buf0[2].s2 &= tmp1.s2;
2726 if (tmp0.s3 == ' ') buf0[2].s3 &= tmp1.s3;
2727 }
2728 else if (i < 15)
2729 {
2730 if (tmp0.s0 == ' ') buf0[3].s0 &= tmp1.s0;
2731 if (tmp0.s1 == ' ') buf0[3].s1 &= tmp1.s1;
2732 if (tmp0.s2 == ' ') buf0[3].s2 &= tmp1.s2;
2733 if (tmp0.s3 == ' ') buf0[3].s3 &= tmp1.s3;
2734 }
2735 else if (i < 19)
2736 {
2737 if (tmp0.s0 == ' ') buf1[0].s0 &= tmp1.s0;
2738 if (tmp0.s1 == ' ') buf1[0].s1 &= tmp1.s1;
2739 if (tmp0.s2 == ' ') buf1[0].s2 &= tmp1.s2;
2740 if (tmp0.s3 == ' ') buf1[0].s3 &= tmp1.s3;
2741 }
2742 else if (i < 23)
2743 {
2744 if (tmp0.s0 == ' ') buf1[1].s0 &= tmp1.s0;
2745 if (tmp0.s1 == ' ') buf1[1].s1 &= tmp1.s1;
2746 if (tmp0.s2 == ' ') buf1[1].s2 &= tmp1.s2;
2747 if (tmp0.s3 == ' ') buf1[1].s3 &= tmp1.s3;
2748 }
2749 else if (i < 27)
2750 {
2751 if (tmp0.s0 == ' ') buf1[2].s0 &= tmp1.s0;
2752 if (tmp0.s1 == ' ') buf1[2].s1 &= tmp1.s1;
2753 if (tmp0.s2 == ' ') buf1[2].s2 &= tmp1.s2;
2754 if (tmp0.s3 == ' ') buf1[2].s3 &= tmp1.s3;
2755 }
2756 else if (i < 31)
2757 {
2758 if (tmp0.s0 == ' ') buf1[3].s0 &= tmp1.s0;
2759 if (tmp0.s1 == ' ') buf1[3].s1 &= tmp1.s1;
2760 if (tmp0.s2 == ' ') buf1[3].s2 &= tmp1.s2;
2761 if (tmp0.s3 == ' ') buf1[3].s3 &= tmp1.s3;
2762 }
2763 #endif
2764 }
2765
2766 #endif
2767
2768 return in_len;
2769 }
2770
2771 u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len)
2772 {
2773 u32 out_len = in_len;
2774
2775 switch (name)
2776 {
2777 case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break;
2778 case RULE_OP_MANGLE_UREST: out_len = rule_op_mangle_urest (p0, p1, buf0, buf1, out_len); break;
2779 case RULE_OP_MANGLE_LREST_UFIRST: out_len = rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, out_len); break;
2780 case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break;
2781 case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break;
2782 case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break;
2783 case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break;
2784 case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break;
2785 case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break;
2786 case RULE_OP_MANGLE_REFLECT: out_len = rule_op_mangle_reflect (p0, p1, buf0, buf1, out_len); break;
2787 case RULE_OP_MANGLE_APPEND: out_len = rule_op_mangle_append (p0, p1, buf0, buf1, out_len); break;
2788 case RULE_OP_MANGLE_PREPEND: out_len = rule_op_mangle_prepend (p0, p1, buf0, buf1, out_len); break;
2789 case RULE_OP_MANGLE_ROTATE_LEFT: out_len = rule_op_mangle_rotate_left (p0, p1, buf0, buf1, out_len); break;
2790 case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = rule_op_mangle_rotate_right (p0, p1, buf0, buf1, out_len); break;
2791 case RULE_OP_MANGLE_DELETE_FIRST: out_len = rule_op_mangle_delete_first (p0, p1, buf0, buf1, out_len); break;
2792 case RULE_OP_MANGLE_DELETE_LAST: out_len = rule_op_mangle_delete_last (p0, p1, buf0, buf1, out_len); break;
2793 case RULE_OP_MANGLE_DELETE_AT: out_len = rule_op_mangle_delete_at (p0, p1, buf0, buf1, out_len); break;
2794 case RULE_OP_MANGLE_EXTRACT: out_len = rule_op_mangle_extract (p0, p1, buf0, buf1, out_len); break;
2795 case RULE_OP_MANGLE_OMIT: out_len = rule_op_mangle_omit (p0, p1, buf0, buf1, out_len); break;
2796 case RULE_OP_MANGLE_INSERT: out_len = rule_op_mangle_insert (p0, p1, buf0, buf1, out_len); break;
2797 case RULE_OP_MANGLE_OVERSTRIKE: out_len = rule_op_mangle_overstrike (p0, p1, buf0, buf1, out_len); break;
2798 case RULE_OP_MANGLE_TRUNCATE_AT: out_len = rule_op_mangle_truncate_at (p0, p1, buf0, buf1, out_len); break;
2799 case RULE_OP_MANGLE_REPLACE: out_len = rule_op_mangle_replace (p0, p1, buf0, buf1, out_len); break;
2800 //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
2801 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
2802 case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = rule_op_mangle_dupechar_first (p0, p1, buf0, buf1, out_len); break;
2803 case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = rule_op_mangle_dupechar_last (p0, p1, buf0, buf1, out_len); break;
2804 case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = rule_op_mangle_dupechar_all (p0, p1, buf0, buf1, out_len); break;
2805 case RULE_OP_MANGLE_SWITCH_FIRST: out_len = rule_op_mangle_switch_first (p0, p1, buf0, buf1, out_len); break;
2806 case RULE_OP_MANGLE_SWITCH_LAST: out_len = rule_op_mangle_switch_last (p0, p1, buf0, buf1, out_len); break;
2807 case RULE_OP_MANGLE_SWITCH_AT: out_len = rule_op_mangle_switch_at (p0, p1, buf0, buf1, out_len); break;
2808 case RULE_OP_MANGLE_CHR_SHIFTL: out_len = rule_op_mangle_chr_shiftl (p0, p1, buf0, buf1, out_len); break;
2809 case RULE_OP_MANGLE_CHR_SHIFTR: out_len = rule_op_mangle_chr_shiftr (p0, p1, buf0, buf1, out_len); break;
2810 case RULE_OP_MANGLE_CHR_INCR: out_len = rule_op_mangle_chr_incr (p0, p1, buf0, buf1, out_len); break;
2811 case RULE_OP_MANGLE_CHR_DECR: out_len = rule_op_mangle_chr_decr (p0, p1, buf0, buf1, out_len); break;
2812 case RULE_OP_MANGLE_REPLACE_NP1: out_len = rule_op_mangle_replace_np1 (p0, p1, buf0, buf1, out_len); break;
2813 case RULE_OP_MANGLE_REPLACE_NM1: out_len = rule_op_mangle_replace_nm1 (p0, p1, buf0, buf1, out_len); break;
2814 case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = rule_op_mangle_dupeblock_first (p0, p1, buf0, buf1, out_len); break;
2815 case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = rule_op_mangle_dupeblock_last (p0, p1, buf0, buf1, out_len); break;
2816 case RULE_OP_MANGLE_TITLE: out_len = rule_op_mangle_title (p0, p1, buf0, buf1, out_len); break;
2817 }
2818
2819 return out_len;
2820 }
2821
2822 u32 apply_rules (__global u32 *cmds, u32x buf0[4], u32x buf1[4], const u32 len)
2823 {
2824 u32 out_len = len;
2825
2826 for (u32 i = 0; cmds[i] != 0; i++)
2827 {
2828 const u32 cmd = cmds[i];
2829
2830 const u32 name = (cmd >> 0) & 0xff;
2831 const u32 p0 = (cmd >> 8) & 0xff;
2832 const u32 p1 = (cmd >> 16) & 0xff;
2833
2834 out_len = apply_rule (name, p0, p1, buf0, buf1, out_len);
2835 }
2836
2837 return out_len;
2838 }