2 * Author......: Jens Steube <jens.steube@gmail.com>
6 static u32x
generate_cmask (u32x buf
)
8 const u32x rmask
= ((buf
& 0x40404040) >> 1)
9 & ~((buf
& 0x80808080) >> 2);
11 const u32x hmask
= (buf
& 0x1f1f1f1f) + 0x05050505;
12 const u32x lmask
= (buf
& 0x1f1f1f1f) + 0x1f1f1f1f;
14 return rmask
& ~hmask
& lmask
;
17 static void truncate_right (u32x w0
[4], u32x w1
[4], const u32 len
)
19 const u32 tmp
= (1 << ((len
% 4) * 8)) - 1;
70 static void truncate_left (u32x w0
[4], u32x w1
[4], const u32 len
)
72 const u32 tmp
= ~((1 << ((len
% 4) * 8)) - 1);
123 static void lshift_block (const u32x in0
[4], const u32x in1
[4], u32x out0
[4], u32x out1
[4])
126 out0[0] = in0[0] >> 8 | in0[1] << 24;
127 out0[1] = in0[1] >> 8 | in0[2] << 24;
128 out0[2] = in0[2] >> 8 | in0[3] << 24;
129 out0[3] = in0[3] >> 8 | in1[0] << 24;
130 out1[0] = in1[0] >> 8 | in1[1] << 24;
131 out1[1] = in1[1] >> 8 | in1[2] << 24;
132 out1[2] = in1[2] >> 8 | in1[3] << 24;
133 out1[3] = in1[3] >> 8;
136 out0
[0] = amd_bytealign (in0
[1], in0
[0], 1);
137 out0
[1] = amd_bytealign (in0
[2], in0
[1], 1);
138 out0
[2] = amd_bytealign (in0
[3], in0
[2], 1);
139 out0
[3] = amd_bytealign (in1
[0], in0
[3], 1);
140 out1
[0] = amd_bytealign (in1
[1], in1
[0], 1);
141 out1
[1] = amd_bytealign (in1
[2], in1
[1], 1);
142 out1
[2] = amd_bytealign (in1
[3], in1
[2], 1);
143 out1
[3] = amd_bytealign ( 0, in1
[3], 1);
146 static void rshift_block (const u32x in0
[4], const u32x in1
[4], u32x out0
[4], u32x out1
[4])
149 out1[3] = in1[3] << 8 | in1[2] >> 24;
150 out1[2] = in1[2] << 8 | in1[1] >> 24;
151 out1[1] = in1[1] << 8 | in1[0] >> 24;
152 out1[0] = in1[0] << 8 | in0[3] >> 24;
153 out0[3] = in0[3] << 8 | in0[2] >> 24;
154 out0[2] = in0[2] << 8 | in0[1] >> 24;
155 out0[1] = in0[1] << 8 | in0[0] >> 24;
156 out0[0] = in0[0] << 8;
159 out1
[3] = amd_bytealign (in1
[3], in1
[2], 3);
160 out1
[2] = amd_bytealign (in1
[2], in1
[1], 3);
161 out1
[1] = amd_bytealign (in1
[1], in1
[0], 3);
162 out1
[0] = amd_bytealign (in1
[0], in0
[3], 3);
163 out0
[3] = amd_bytealign (in0
[3], in0
[2], 3);
164 out0
[2] = amd_bytealign (in0
[2], in0
[1], 3);
165 out0
[1] = amd_bytealign (in0
[1], in0
[0], 3);
166 out0
[0] = amd_bytealign (in0
[0], 0, 3);
169 static void lshift_block_N (const u32x in0
[4], const u32x in1
[4], u32x out0
[4], u32x out1
[4], const u32 num
)
173 case 0: out0
[0] = in0
[0];
182 case 1: out0
[0] = amd_bytealign (in0
[1], in0
[0], 1);
183 out0
[1] = amd_bytealign (in0
[2], in0
[1], 1);
184 out0
[2] = amd_bytealign (in0
[3], in0
[2], 1);
185 out0
[3] = amd_bytealign (in1
[0], in0
[3], 1);
186 out1
[0] = amd_bytealign (in1
[1], in1
[0], 1);
187 out1
[1] = amd_bytealign (in1
[2], in1
[1], 1);
188 out1
[2] = amd_bytealign (in1
[3], in1
[2], 1);
189 out1
[3] = amd_bytealign ( 0, in1
[3], 1);
191 case 2: out0
[0] = amd_bytealign (in0
[1], in0
[0], 2);
192 out0
[1] = amd_bytealign (in0
[2], in0
[1], 2);
193 out0
[2] = amd_bytealign (in0
[3], in0
[2], 2);
194 out0
[3] = amd_bytealign (in1
[0], in0
[3], 2);
195 out1
[0] = amd_bytealign (in1
[1], in1
[0], 2);
196 out1
[1] = amd_bytealign (in1
[2], in1
[1], 2);
197 out1
[2] = amd_bytealign (in1
[3], in1
[2], 2);
198 out1
[3] = amd_bytealign ( 0, in1
[3], 2);
200 case 3: out0
[0] = amd_bytealign (in0
[1], in0
[0], 3);
201 out0
[1] = amd_bytealign (in0
[2], in0
[1], 3);
202 out0
[2] = amd_bytealign (in0
[3], in0
[2], 3);
203 out0
[3] = amd_bytealign (in1
[0], in0
[3], 3);
204 out1
[0] = amd_bytealign (in1
[1], in1
[0], 3);
205 out1
[1] = amd_bytealign (in1
[2], in1
[1], 3);
206 out1
[2] = amd_bytealign (in1
[3], in1
[2], 3);
207 out1
[3] = amd_bytealign ( 0, in1
[3], 3);
209 case 4: out0
[0] = in0
[1];
218 case 5: out0
[0] = amd_bytealign (in0
[2], in0
[1], 1);
219 out0
[1] = amd_bytealign (in0
[3], in0
[2], 1);
220 out0
[2] = amd_bytealign (in1
[0], in0
[3], 1);
221 out0
[3] = amd_bytealign (in1
[1], in1
[0], 1);
222 out1
[0] = amd_bytealign (in1
[2], in1
[1], 1);
223 out1
[1] = amd_bytealign (in1
[3], in1
[2], 1);
224 out1
[2] = amd_bytealign ( 0, in1
[3], 1);
227 case 6: out0
[0] = amd_bytealign (in0
[2], in0
[1], 2);
228 out0
[1] = amd_bytealign (in0
[3], in0
[2], 2);
229 out0
[2] = amd_bytealign (in1
[0], in0
[3], 2);
230 out0
[3] = amd_bytealign (in1
[1], in1
[0], 2);
231 out1
[0] = amd_bytealign (in1
[2], in1
[1], 2);
232 out1
[1] = amd_bytealign (in1
[3], in1
[2], 2);
233 out1
[2] = amd_bytealign ( 0, in1
[3], 2);
236 case 7: out0
[0] = amd_bytealign (in0
[2], in0
[1], 3);
237 out0
[1] = amd_bytealign (in0
[3], in0
[2], 3);
238 out0
[2] = amd_bytealign (in1
[0], in0
[3], 3);
239 out0
[3] = amd_bytealign (in1
[1], in1
[0], 3);
240 out1
[0] = amd_bytealign (in1
[2], in1
[1], 3);
241 out1
[1] = amd_bytealign (in1
[3], in1
[2], 3);
242 out1
[2] = amd_bytealign ( 0, in1
[3], 3);
245 case 8: out0
[0] = in0
[2];
254 case 9: out0
[0] = amd_bytealign (in0
[3], in0
[2], 1);
255 out0
[1] = amd_bytealign (in1
[0], in0
[3], 1);
256 out0
[2] = amd_bytealign (in1
[1], in1
[0], 1);
257 out0
[3] = amd_bytealign (in1
[2], in1
[1], 1);
258 out1
[0] = amd_bytealign (in1
[3], in1
[2], 1);
259 out1
[1] = amd_bytealign ( 0, in1
[3], 1);
263 case 10: out0
[0] = amd_bytealign (in0
[3], in0
[2], 2);
264 out0
[1] = amd_bytealign (in1
[0], in0
[3], 2);
265 out0
[2] = amd_bytealign (in1
[1], in1
[0], 2);
266 out0
[3] = amd_bytealign (in1
[2], in1
[1], 2);
267 out1
[0] = amd_bytealign (in1
[3], in1
[2], 2);
268 out1
[1] = amd_bytealign ( 0, in1
[3], 2);
272 case 11: out0
[0] = amd_bytealign (in0
[3], in0
[2], 3);
273 out0
[1] = amd_bytealign (in1
[0], in0
[3], 3);
274 out0
[2] = amd_bytealign (in1
[1], in1
[0], 3);
275 out0
[3] = amd_bytealign (in1
[2], in1
[1], 3);
276 out1
[0] = amd_bytealign (in1
[3], in1
[2], 3);
277 out1
[1] = amd_bytealign ( 0, in1
[3], 3);
281 case 12: out0
[0] = in0
[3];
290 case 13: out0
[0] = amd_bytealign (in1
[0], in0
[3], 1);
291 out0
[1] = amd_bytealign (in1
[1], in1
[0], 1);
292 out0
[2] = amd_bytealign (in1
[2], in1
[1], 1);
293 out0
[3] = amd_bytealign (in1
[3], in1
[2], 1);
294 out1
[0] = amd_bytealign ( 0, in1
[3], 1);
299 case 14: out0
[0] = amd_bytealign (in1
[0], in0
[3], 2);
300 out0
[1] = amd_bytealign (in1
[1], in1
[0], 2);
301 out0
[2] = amd_bytealign (in1
[2], in1
[1], 2);
302 out0
[3] = amd_bytealign (in1
[3], in1
[2], 2);
303 out1
[0] = amd_bytealign ( 0, in1
[3], 2);
308 case 15: out0
[0] = amd_bytealign (in1
[0], in0
[3], 3);
309 out0
[1] = amd_bytealign (in1
[1], in1
[0], 3);
310 out0
[2] = amd_bytealign (in1
[2], in1
[1], 3);
311 out0
[3] = amd_bytealign (in1
[3], in1
[2], 3);
312 out1
[0] = amd_bytealign ( 0, in1
[3], 3);
317 case 16: out0
[0] = in1
[0];
326 case 17: out0
[0] = amd_bytealign (in1
[1], in1
[0], 1);
327 out0
[1] = amd_bytealign (in1
[2], in1
[1], 1);
328 out0
[2] = amd_bytealign (in1
[3], in1
[2], 1);
329 out0
[3] = amd_bytealign ( 0, in1
[3], 1);
335 case 18: out0
[0] = amd_bytealign (in1
[1], in1
[0], 2);
336 out0
[1] = amd_bytealign (in1
[2], in1
[1], 2);
337 out0
[2] = amd_bytealign (in1
[3], in1
[2], 2);
338 out0
[3] = amd_bytealign ( 0, in1
[3], 2);
344 case 19: out0
[0] = amd_bytealign (in1
[1], in1
[0], 3);
345 out0
[1] = amd_bytealign (in1
[2], in1
[1], 3);
346 out0
[2] = amd_bytealign (in1
[3], in1
[2], 3);
347 out0
[3] = amd_bytealign ( 0, in1
[3], 3);
353 case 20: out0
[0] = in1
[1];
362 case 21: out0
[0] = amd_bytealign (in1
[2], in1
[1], 1);
363 out0
[1] = amd_bytealign (in1
[3], in1
[2], 1);
364 out0
[2] = amd_bytealign ( 0, in1
[3], 1);
371 case 22: out0
[0] = amd_bytealign (in1
[2], in1
[1], 2);
372 out0
[1] = amd_bytealign (in1
[3], in1
[2], 2);
373 out0
[2] = amd_bytealign ( 0, in1
[3], 2);
380 case 23: out0
[0] = amd_bytealign (in1
[2], in1
[1], 3);
381 out0
[1] = amd_bytealign (in1
[3], in1
[2], 3);
382 out0
[2] = amd_bytealign ( 0, in1
[3], 3);
389 case 24: out0
[0] = in1
[2];
398 case 25: out0
[0] = amd_bytealign (in1
[3], in1
[2], 1);
399 out0
[1] = amd_bytealign ( 0, in1
[3], 1);
407 case 26: out0
[0] = amd_bytealign (in1
[3], in1
[2], 2);
408 out0
[1] = amd_bytealign ( 0, in1
[3], 2);
416 case 27: out0
[0] = amd_bytealign (in1
[3], in1
[2], 3);
417 out0
[1] = amd_bytealign ( 0, in1
[3], 3);
425 case 28: out0
[0] = in1
[3];
434 case 29: out0
[0] = amd_bytealign ( 0, in1
[3], 1);
443 case 30: out0
[0] = amd_bytealign ( 0, in1
[3], 2);
452 case 31: out0
[0] = amd_bytealign ( 0, in1
[3], 3);
464 static void rshift_block_N (const u32x in0
[4], const u32x in1
[4], u32x out0
[4], u32x out1
[4], const u32 num
)
468 case 0: out1
[3] = in1
[3];
477 case 1: out1
[3] = amd_bytealign (in1
[3], in1
[2], 3);
478 out1
[2] = amd_bytealign (in1
[2], in1
[1], 3);
479 out1
[1] = amd_bytealign (in1
[1], in1
[0], 3);
480 out1
[0] = amd_bytealign (in1
[0], in0
[3], 3);
481 out0
[3] = amd_bytealign (in0
[3], in0
[2], 3);
482 out0
[2] = amd_bytealign (in0
[2], in0
[1], 3);
483 out0
[1] = amd_bytealign (in0
[1], in0
[0], 3);
484 out0
[0] = amd_bytealign (in0
[0], 0, 3);
486 case 2: out1
[3] = amd_bytealign (in1
[3], in1
[2], 2);
487 out1
[2] = amd_bytealign (in1
[2], in1
[1], 2);
488 out1
[1] = amd_bytealign (in1
[1], in1
[0], 2);
489 out1
[0] = amd_bytealign (in1
[0], in0
[3], 2);
490 out0
[3] = amd_bytealign (in0
[3], in0
[2], 2);
491 out0
[2] = amd_bytealign (in0
[2], in0
[1], 2);
492 out0
[1] = amd_bytealign (in0
[1], in0
[0], 2);
493 out0
[0] = amd_bytealign (in0
[0], 0, 2);
495 case 3: out1
[3] = amd_bytealign (in1
[3], in1
[2], 1);
496 out1
[2] = amd_bytealign (in1
[2], in1
[1], 1);
497 out1
[1] = amd_bytealign (in1
[1], in1
[0], 1);
498 out1
[0] = amd_bytealign (in1
[0], in0
[3], 1);
499 out0
[3] = amd_bytealign (in0
[3], in0
[2], 1);
500 out0
[2] = amd_bytealign (in0
[2], in0
[1], 1);
501 out0
[1] = amd_bytealign (in0
[1], in0
[0], 1);
502 out0
[0] = amd_bytealign (in0
[0], 0, 1);
504 case 4: out1
[3] = in1
[2];
513 case 5: out1
[3] = amd_bytealign (in1
[2], in1
[1], 3);
514 out1
[2] = amd_bytealign (in1
[1], in1
[0], 3);
515 out1
[1] = amd_bytealign (in1
[0], in0
[3], 3);
516 out1
[0] = amd_bytealign (in0
[3], in0
[2], 3);
517 out0
[3] = amd_bytealign (in0
[2], in0
[1], 3);
518 out0
[2] = amd_bytealign (in0
[1], in0
[0], 3);
519 out0
[1] = amd_bytealign (in0
[0], 0, 3);
522 case 6: out1
[3] = amd_bytealign (in1
[2], in1
[1], 2);
523 out1
[2] = amd_bytealign (in1
[1], in1
[0], 2);
524 out1
[1] = amd_bytealign (in1
[0], in0
[3], 2);
525 out1
[0] = amd_bytealign (in0
[3], in0
[2], 2);
526 out0
[3] = amd_bytealign (in0
[2], in0
[1], 2);
527 out0
[2] = amd_bytealign (in0
[1], in0
[0], 2);
528 out0
[1] = amd_bytealign (in0
[0], 0, 2);
531 case 7: out1
[3] = amd_bytealign (in1
[2], in1
[1], 1);
532 out1
[2] = amd_bytealign (in1
[1], in1
[0], 1);
533 out1
[1] = amd_bytealign (in1
[0], in0
[3], 1);
534 out1
[0] = amd_bytealign (in0
[3], in0
[2], 1);
535 out0
[3] = amd_bytealign (in0
[2], in0
[1], 1);
536 out0
[2] = amd_bytealign (in0
[1], in0
[0], 1);
537 out0
[1] = amd_bytealign (in0
[0], 0, 1);
540 case 8: out1
[3] = in1
[1];
549 case 9: out1
[3] = amd_bytealign (in1
[1], in1
[0], 3);
550 out1
[2] = amd_bytealign (in1
[0], in0
[3], 3);
551 out1
[1] = amd_bytealign (in0
[3], in0
[2], 3);
552 out1
[0] = amd_bytealign (in0
[2], in0
[1], 3);
553 out0
[3] = amd_bytealign (in0
[1], in0
[0], 3);
554 out0
[2] = amd_bytealign (in0
[0], 0, 3);
558 case 10: out1
[3] = amd_bytealign (in1
[1], in1
[0], 2);
559 out1
[2] = amd_bytealign (in1
[0], in0
[3], 2);
560 out1
[1] = amd_bytealign (in0
[3], in0
[2], 2);
561 out1
[0] = amd_bytealign (in0
[2], in0
[1], 2);
562 out0
[3] = amd_bytealign (in0
[1], in0
[0], 2);
563 out0
[2] = amd_bytealign (in0
[0], 0, 2);
567 case 11: out1
[3] = amd_bytealign (in1
[1], in1
[0], 1);
568 out1
[2] = amd_bytealign (in1
[0], in0
[3], 1);
569 out1
[1] = amd_bytealign (in0
[3], in0
[2], 1);
570 out1
[0] = amd_bytealign (in0
[2], in0
[1], 1);
571 out0
[3] = amd_bytealign (in0
[1], in0
[0], 1);
572 out0
[2] = amd_bytealign (in0
[0], 0, 1);
576 case 12: out1
[3] = in1
[0];
585 case 13: out1
[3] = amd_bytealign (in1
[0], in0
[3], 3);
586 out1
[2] = amd_bytealign (in0
[3], in0
[2], 3);
587 out1
[1] = amd_bytealign (in0
[2], in0
[1], 3);
588 out1
[0] = amd_bytealign (in0
[1], in0
[0], 3);
589 out0
[3] = amd_bytealign (in0
[0], 0, 3);
594 case 14: out1
[3] = amd_bytealign (in1
[0], in0
[3], 2);
595 out1
[2] = amd_bytealign (in0
[3], in0
[2], 2);
596 out1
[1] = amd_bytealign (in0
[2], in0
[1], 2);
597 out1
[0] = amd_bytealign (in0
[1], in0
[0], 2);
598 out0
[3] = amd_bytealign (in0
[0], 0, 2);
603 case 15: out1
[3] = amd_bytealign (in1
[0], in0
[3], 1);
604 out1
[2] = amd_bytealign (in0
[3], in0
[2], 1);
605 out1
[1] = amd_bytealign (in0
[2], in0
[1], 1);
606 out1
[0] = amd_bytealign (in0
[1], in0
[0], 1);
607 out0
[3] = amd_bytealign (in0
[0], 0, 1);
612 case 16: out1
[3] = in0
[3];
621 case 17: out1
[3] = amd_bytealign (in0
[3], in0
[2], 3);
622 out1
[2] = amd_bytealign (in0
[2], in0
[1], 3);
623 out1
[1] = amd_bytealign (in0
[1], in0
[0], 3);
624 out1
[0] = amd_bytealign (in0
[0], 0, 3);
630 case 18: out1
[3] = amd_bytealign (in0
[3], in0
[2], 2);
631 out1
[2] = amd_bytealign (in0
[2], in0
[1], 2);
632 out1
[1] = amd_bytealign (in0
[1], in0
[0], 2);
633 out1
[0] = amd_bytealign (in0
[0], 0, 2);
639 case 19: out1
[3] = amd_bytealign (in0
[3], in0
[2], 1);
640 out1
[2] = amd_bytealign (in0
[2], in0
[1], 1);
641 out1
[1] = amd_bytealign (in0
[1], in0
[0], 1);
642 out1
[0] = amd_bytealign (in0
[0], 0, 1);
648 case 20: out1
[3] = in0
[2];
657 case 21: out1
[3] = amd_bytealign (in0
[2], in0
[1], 3);
658 out1
[2] = amd_bytealign (in0
[1], in0
[0], 3);
659 out1
[1] = amd_bytealign (in0
[0], 0, 3);
666 case 22: out1
[3] = amd_bytealign (in0
[2], in0
[1], 2);
667 out1
[2] = amd_bytealign (in0
[1], in0
[0], 2);
668 out1
[1] = amd_bytealign (in0
[0], 0, 2);
675 case 23: out1
[3] = amd_bytealign (in0
[2], in0
[1], 1);
676 out1
[2] = amd_bytealign (in0
[1], in0
[0], 1);
677 out1
[1] = amd_bytealign (in0
[0], 0, 1);
684 case 24: out1
[3] = in0
[1];
693 case 25: out1
[3] = amd_bytealign (in0
[1], in0
[0], 3);
694 out1
[2] = amd_bytealign (in0
[0], 0, 3);
702 case 26: out1
[3] = amd_bytealign (in0
[1], in0
[0], 2);
703 out1
[2] = amd_bytealign (in0
[0], 0, 2);
711 case 27: out1
[3] = amd_bytealign (in0
[1], in0
[0], 1);
712 out1
[2] = amd_bytealign (in0
[0], 0, 1);
720 case 28: out1
[3] = in0
[0];
729 case 29: out1
[3] = amd_bytealign (in0
[0], 0, 3);
738 case 30: out1
[3] = amd_bytealign (in0
[0], 0, 2);
747 case 31: out1
[3] = amd_bytealign (in0
[0], 0, 1);
759 static void append_block1 (const u32 offset
, u32x dst0
[4], u32x dst1
[4], const u32x src_r0
)
765 case 0: tmp
[0] = src_r0
;
768 case 1: tmp
[0] = src_r0
<< 8;
769 tmp
[1] = src_r0
>> 24;
771 case 2: tmp
[0] = src_r0
<< 16;
772 tmp
[1] = src_r0
>> 16;
774 case 3: tmp
[0] = src_r0
<< 24;
775 tmp
[1] = src_r0
>> 8;
781 case 0: dst0
[0] |= tmp
[0];
784 case 1: dst0
[1] |= tmp
[0];
787 case 2: dst0
[2] |= tmp
[0];
790 case 3: dst0
[3] |= tmp
[0];
793 case 4: dst1
[0] |= tmp
[0];
796 case 5: dst1
[1] |= tmp
[0];
799 case 6: dst1
[2] |= tmp
[0];
802 case 7: dst1
[3] |= tmp
[0];
807 static void append_block8 (const u32 offset
, u32x dst0
[4], u32x dst1
[4], const u32x src_l0
[4], const u32x src_l1
[4], const u32x src_r0
[4], const u32x src_r1
[4])
825 dst0
[1] = amd_bytealign (src_r0
[1], src_r0
[0], 3);
826 dst0
[2] = amd_bytealign (src_r0
[2], src_r0
[1], 3);
827 dst0
[3] = amd_bytealign (src_r0
[3], src_r0
[2], 3);
828 dst1
[0] = amd_bytealign (src_r1
[0], src_r0
[3], 3);
829 dst1
[1] = amd_bytealign (src_r1
[1], src_r1
[0], 3);
830 dst1
[2] = amd_bytealign (src_r1
[2], src_r1
[1], 3);
831 dst1
[3] = amd_bytealign (src_r1
[3], src_r1
[2], 3);
837 dst0
[1] = amd_bytealign (src_r0
[1], src_r0
[0], 2);
838 dst0
[2] = amd_bytealign (src_r0
[2], src_r0
[1], 2);
839 dst0
[3] = amd_bytealign (src_r0
[3], src_r0
[2], 2);
840 dst1
[0] = amd_bytealign (src_r1
[0], src_r0
[3], 2);
841 dst1
[1] = amd_bytealign (src_r1
[1], src_r1
[0], 2);
842 dst1
[2] = amd_bytealign (src_r1
[2], src_r1
[1], 2);
843 dst1
[3] = amd_bytealign (src_r1
[3], src_r1
[2], 2);
849 dst0
[1] = amd_bytealign (src_r0
[1], src_r0
[0], 1);
850 dst0
[2] = amd_bytealign (src_r0
[2], src_r0
[1], 1);
851 dst0
[3] = amd_bytealign (src_r0
[3], src_r0
[2], 1);
852 dst1
[0] = amd_bytealign (src_r1
[0], src_r0
[3], 1);
853 dst1
[1] = amd_bytealign (src_r1
[1], src_r1
[0], 1);
854 dst1
[2] = amd_bytealign (src_r1
[2], src_r1
[1], 1);
855 dst1
[3] = amd_bytealign (src_r1
[3], src_r1
[2], 1);
871 dst0
[2] = amd_bytealign (src_r0
[1], src_r0
[0], 3);
872 dst0
[3] = amd_bytealign (src_r0
[2], src_r0
[1], 3);
873 dst1
[0] = amd_bytealign (src_r0
[3], src_r0
[2], 3);
874 dst1
[1] = amd_bytealign (src_r1
[0], src_r0
[3], 3);
875 dst1
[2] = amd_bytealign (src_r1
[1], src_r1
[0], 3);
876 dst1
[3] = amd_bytealign (src_r1
[2], src_r1
[1], 3);
882 dst0
[2] = amd_bytealign (src_r0
[1], src_r0
[0], 2);
883 dst0
[3] = amd_bytealign (src_r0
[2], src_r0
[1], 2);
884 dst1
[0] = amd_bytealign (src_r0
[3], src_r0
[2], 2);
885 dst1
[1] = amd_bytealign (src_r1
[0], src_r0
[3], 2);
886 dst1
[2] = amd_bytealign (src_r1
[1], src_r1
[0], 2);
887 dst1
[3] = amd_bytealign (src_r1
[2], src_r1
[1], 2);
893 dst0
[2] = amd_bytealign (src_r0
[1], src_r0
[0], 1);
894 dst0
[3] = amd_bytealign (src_r0
[2], src_r0
[1], 1);
895 dst1
[0] = amd_bytealign (src_r0
[3], src_r0
[2], 1);
896 dst1
[1] = amd_bytealign (src_r1
[0], src_r0
[3], 1);
897 dst1
[2] = amd_bytealign (src_r1
[1], src_r1
[0], 1);
898 dst1
[3] = amd_bytealign (src_r1
[2], src_r1
[1], 1);
913 dst0
[3] = amd_bytealign (src_r0
[1], src_r0
[0], 3);
914 dst1
[0] = amd_bytealign (src_r0
[2], src_r0
[1], 3);
915 dst1
[1] = amd_bytealign (src_r0
[3], src_r0
[2], 3);
916 dst1
[2] = amd_bytealign (src_r1
[0], src_r0
[3], 3);
917 dst1
[3] = amd_bytealign (src_r1
[1], src_r1
[0], 3);
923 dst0
[3] = amd_bytealign (src_r0
[1], src_r0
[0], 2);
924 dst1
[0] = amd_bytealign (src_r0
[2], src_r0
[1], 2);
925 dst1
[1] = amd_bytealign (src_r0
[3], src_r0
[2], 2);
926 dst1
[2] = amd_bytealign (src_r1
[0], src_r0
[3], 2);
927 dst1
[3] = amd_bytealign (src_r1
[1], src_r1
[0], 2);
933 dst0
[3] = amd_bytealign (src_r0
[1], src_r0
[0], 1);
934 dst1
[0] = amd_bytealign (src_r0
[2], src_r0
[1], 1);
935 dst1
[1] = amd_bytealign (src_r0
[3], src_r0
[2], 1);
936 dst1
[2] = amd_bytealign (src_r1
[0], src_r0
[3], 1);
937 dst1
[3] = amd_bytealign (src_r1
[1], src_r1
[0], 1);
951 dst1
[0] = amd_bytealign (src_r0
[1], src_r0
[0], 3);
952 dst1
[1] = amd_bytealign (src_r0
[2], src_r0
[1], 3);
953 dst1
[2] = amd_bytealign (src_r0
[3], src_r0
[2], 3);
954 dst1
[3] = amd_bytealign (src_r1
[0], src_r0
[3], 3);
960 dst1
[0] = amd_bytealign (src_r0
[1], src_r0
[0], 2);
961 dst1
[1] = amd_bytealign (src_r0
[2], src_r0
[1], 2);
962 dst1
[2] = amd_bytealign (src_r0
[3], src_r0
[2], 2);
963 dst1
[3] = amd_bytealign (src_r1
[0], src_r0
[3], 2);
969 dst1
[0] = amd_bytealign (src_r0
[1], src_r0
[0], 1);
970 dst1
[1] = amd_bytealign (src_r0
[2], src_r0
[1], 1);
971 dst1
[2] = amd_bytealign (src_r0
[3], src_r0
[2], 1);
972 dst1
[3] = amd_bytealign (src_r1
[0], src_r0
[3], 1);
985 dst1
[1] = amd_bytealign (src_r0
[1], src_r0
[0], 3);
986 dst1
[2] = amd_bytealign (src_r0
[2], src_r0
[1], 3);
987 dst1
[3] = amd_bytealign (src_r0
[3], src_r0
[2], 3);
993 dst1
[1] = amd_bytealign (src_r0
[1], src_r0
[0], 2);
994 dst1
[2] = amd_bytealign (src_r0
[2], src_r0
[1], 2);
995 dst1
[3] = amd_bytealign (src_r0
[3], src_r0
[2], 2);
1001 dst1
[1] = amd_bytealign (src_r0
[1], src_r0
[0], 1);
1002 dst1
[2] = amd_bytealign (src_r0
[2], src_r0
[1], 1);
1003 dst1
[3] = amd_bytealign (src_r0
[3], src_r0
[2], 1);
1007 dst1
[1] = src_r0
[0];
1008 dst1
[2] = src_r0
[1];
1009 dst1
[3] = src_r0
[2];
1015 dst1
[2] = amd_bytealign (src_r0
[1], src_r0
[0], 3);
1016 dst1
[3] = amd_bytealign (src_r0
[2], src_r0
[1], 3);
1022 dst1
[2] = amd_bytealign (src_r0
[1], src_r0
[0], 2);
1023 dst1
[3] = amd_bytealign (src_r0
[2], src_r0
[1], 2);
1029 dst1
[2] = amd_bytealign (src_r0
[1], src_r0
[0], 1);
1030 dst1
[3] = amd_bytealign (src_r0
[2], src_r0
[1], 1);
1034 dst1
[2] = src_r0
[0];
1035 dst1
[3] = src_r0
[1];
1041 dst1
[3] = amd_bytealign (src_r0
[1], src_r0
[0], 3);
1047 dst1
[3] = amd_bytealign (src_r0
[1], src_r0
[0], 2);
1053 dst1
[3] = amd_bytealign (src_r0
[1], src_r0
[0], 1);
1057 dst1
[3] = src_r0
[0];
1077 static void reverse_block (u32x in0
[4], u32x in1
[4], u32x out0
[4], u32x out1
[4], const u32 len
)
1079 rshift_block_N (in0
, in1
, out0
, out1
, 32 - len
);
1093 out0
[0] = swap_workaround (tib40
[0]);
1094 out0
[1] = swap_workaround (tib40
[1]);
1095 out0
[2] = swap_workaround (tib40
[2]);
1096 out0
[3] = swap_workaround (tib40
[3]);
1097 out1
[0] = swap_workaround (tib41
[0]);
1098 out1
[1] = swap_workaround (tib41
[1]);
1099 out1
[2] = swap_workaround (tib41
[2]);
1100 out1
[3] = swap_workaround (tib41
[3]);
1103 static u32
rule_op_mangle_lrest (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1105 buf0
[0] |= (generate_cmask (buf0
[0]));
1106 buf0
[1] |= (generate_cmask (buf0
[1]));
1107 buf0
[2] |= (generate_cmask (buf0
[2]));
1108 buf0
[3] |= (generate_cmask (buf0
[3]));
1109 buf1
[0] |= (generate_cmask (buf1
[0]));
1110 buf1
[1] |= (generate_cmask (buf1
[1]));
1111 buf1
[2] |= (generate_cmask (buf1
[2]));
1112 buf1
[3] |= (generate_cmask (buf1
[3]));
1117 static u32
rule_op_mangle_urest (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1119 buf0
[0] &= ~(generate_cmask (buf0
[0]));
1120 buf0
[1] &= ~(generate_cmask (buf0
[1]));
1121 buf0
[2] &= ~(generate_cmask (buf0
[2]));
1122 buf0
[3] &= ~(generate_cmask (buf0
[3]));
1123 buf1
[0] &= ~(generate_cmask (buf1
[0]));
1124 buf1
[1] &= ~(generate_cmask (buf1
[1]));
1125 buf1
[2] &= ~(generate_cmask (buf1
[2]));
1126 buf1
[3] &= ~(generate_cmask (buf1
[3]));
1131 static u32
rule_op_mangle_lrest_ufirst (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1133 rule_op_mangle_lrest (p0
, p1
, buf0
, buf1
, in_len
);
1135 buf0
[0] &= ~(0x00000020 & generate_cmask (buf0
[0]));
1140 static u32
rule_op_mangle_urest_lfirst (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1142 rule_op_mangle_urest (p0
, p1
, buf0
, buf1
, in_len
);
1144 buf0
[0] |= (0x00000020 & generate_cmask (buf0
[0]));
1149 static u32
rule_op_mangle_trest (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1151 buf0
[0] ^= (generate_cmask (buf0
[0]));
1152 buf0
[1] ^= (generate_cmask (buf0
[1]));
1153 buf0
[2] ^= (generate_cmask (buf0
[2]));
1154 buf0
[3] ^= (generate_cmask (buf0
[3]));
1155 buf1
[0] ^= (generate_cmask (buf1
[0]));
1156 buf1
[1] ^= (generate_cmask (buf1
[1]));
1157 buf1
[2] ^= (generate_cmask (buf1
[2]));
1158 buf1
[3] ^= (generate_cmask (buf1
[3]));
1163 static u32
rule_op_mangle_toggle_at (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1165 if (p0
>= in_len
) return (in_len
);
1167 const u32 tmp
= 0x20 << ((p0
& 3) * 8);
1171 case 0: buf0
[0] ^= (tmp
& generate_cmask (buf0
[0])); break;
1172 case 1: buf0
[1] ^= (tmp
& generate_cmask (buf0
[1])); break;
1173 case 2: buf0
[2] ^= (tmp
& generate_cmask (buf0
[2])); break;
1174 case 3: buf0
[3] ^= (tmp
& generate_cmask (buf0
[3])); break;
1175 case 4: buf1
[0] ^= (tmp
& generate_cmask (buf1
[0])); break;
1176 case 5: buf1
[1] ^= (tmp
& generate_cmask (buf1
[1])); break;
1177 case 6: buf1
[2] ^= (tmp
& generate_cmask (buf1
[2])); break;
1178 case 7: buf1
[3] ^= (tmp
& generate_cmask (buf1
[3])); break;
1184 static u32
rule_op_mangle_reverse (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1186 reverse_block (buf0
, buf1
, buf0
, buf1
, in_len
);
1191 static u32
rule_op_mangle_dupeword (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1193 if ((in_len
+ in_len
) >= 32) return (in_len
);
1195 u32 out_len
= in_len
;
1209 append_block8 (out_len
, buf0
, buf1
, buf0
, buf1
, tib40
, tib41
);
1216 static u32
rule_op_mangle_dupeword_times (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1218 if (((in_len
* p0
) + in_len
) >= 32) return (in_len
);
1220 u32 out_len
= in_len
;
1234 for (u32 i
= 0; i
< p0
; i
++)
1236 append_block8 (out_len
, buf0
, buf1
, buf0
, buf1
, tib40
, tib41
);
1244 static u32
rule_op_mangle_reflect (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1246 if ((in_len
+ in_len
) >= 32) return (in_len
);
1248 u32 out_len
= in_len
;
1253 reverse_block (buf0
, buf1
, tib40
, tib41
, out_len
);
1255 append_block8 (out_len
, buf0
, buf1
, buf0
, buf1
, tib40
, tib41
);
1262 static u32
rule_op_mangle_append (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1264 if ((in_len
+ 1) >= 32) return (in_len
);
1266 u32 out_len
= in_len
;
1268 append_block1 (out_len
, buf0
, buf1
, p0
);
1275 static u32
rule_op_mangle_prepend (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1277 if ((in_len
+ 1) >= 32) return (in_len
);
1279 u32 out_len
= in_len
;
1281 rshift_block (buf0
, buf1
, buf0
, buf1
);
1283 buf0
[0] = buf0
[0] | p0
;
1290 static u32
rule_op_mangle_rotate_left (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1292 if (in_len
== 0) return (in_len
);
1294 const u32 in_len1
= in_len
- 1;
1296 const u32 sh
= (in_len1
& 3) * 8;
1298 const u32x tmp
= (buf0
[0] & 0xff) << sh
;
1300 lshift_block (buf0
, buf1
, buf0
, buf1
);
1302 switch (in_len1
/ 4)
1304 case 0: buf0
[0] |= tmp
; break;
1305 case 1: buf0
[1] |= tmp
; break;
1306 case 2: buf0
[2] |= tmp
; break;
1307 case 3: buf0
[3] |= tmp
; break;
1308 case 4: buf1
[0] |= tmp
; break;
1309 case 5: buf1
[1] |= tmp
; break;
1310 case 6: buf1
[2] |= tmp
; break;
1311 case 7: buf1
[3] |= tmp
; break;
1317 static u32
rule_op_mangle_rotate_right (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1319 if (in_len
== 0) return (in_len
);
1321 const u32 in_len1
= in_len
- 1;
1323 const u32 sh
= (in_len1
& 3) * 8;
1327 switch (in_len1
/ 4)
1329 case 0: tmp
= (buf0
[0] >> sh
) & 0xff; break;
1330 case 1: tmp
= (buf0
[1] >> sh
) & 0xff; break;
1331 case 2: tmp
= (buf0
[2] >> sh
) & 0xff; break;
1332 case 3: tmp
= (buf0
[3] >> sh
) & 0xff; break;
1333 case 4: tmp
= (buf1
[0] >> sh
) & 0xff; break;
1334 case 5: tmp
= (buf1
[1] >> sh
) & 0xff; break;
1335 case 6: tmp
= (buf1
[2] >> sh
) & 0xff; break;
1336 case 7: tmp
= (buf1
[3] >> sh
) & 0xff; break;
1339 rshift_block (buf0
, buf1
, buf0
, buf1
);
1343 truncate_right (buf0
, buf1
, in_len
);
1348 static u32
rule_op_mangle_delete_first (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1350 if (in_len
== 0) return (in_len
);
1352 const u32 in_len1
= in_len
- 1;
1354 lshift_block (buf0
, buf1
, buf0
, buf1
);
1359 static u32
rule_op_mangle_delete_last (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1361 if (in_len
== 0) return (in_len
);
1363 const u32 in_len1
= in_len
- 1;
1365 const u32 tmp
= (1 << ((in_len1
& 3) * 8)) - 1;
1367 switch (in_len1
/ 4)
1369 case 0: buf0
[0] &= tmp
; break;
1370 case 1: buf0
[1] &= tmp
; break;
1371 case 2: buf0
[2] &= tmp
; break;
1372 case 3: buf0
[3] &= tmp
; break;
1373 case 4: buf1
[0] &= tmp
; break;
1374 case 5: buf1
[1] &= tmp
; break;
1375 case 6: buf1
[2] &= tmp
; break;
1376 case 7: buf1
[3] &= tmp
; break;
1382 static u32
rule_op_mangle_delete_at (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1384 if (p0
>= in_len
) return (in_len
);
1386 u32 out_len
= in_len
;
1391 lshift_block (buf0
, buf1
, tib40
, tib41
);
1393 const u32 ml
= (1 << ((p0
& 3) * 8)) - 1;
1398 case 0: buf0
[0] = (buf0
[0] & ml
)
1408 case 1: buf0
[1] = (buf0
[1] & ml
)
1417 case 2: buf0
[2] = (buf0
[2] & ml
)
1425 case 3: buf0
[3] = (buf0
[3] & ml
)
1432 case 4: buf1
[0] = (buf1
[0] & ml
)
1438 case 5: buf1
[1] = (buf1
[1] & ml
)
1443 case 6: buf1
[2] = (buf1
[2] & ml
)
1447 case 7: buf1
[3] = (buf1
[3] & ml
)
1457 static u32
rule_op_mangle_extract (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1459 if (p0
>= in_len
) return (in_len
);
1461 if ((p0
+ p1
) > in_len
) return (in_len
);
1465 lshift_block_N (buf0
, buf1
, buf0
, buf1
, p0
);
1467 truncate_right (buf0
, buf1
, out_len
);
1472 static u32
rule_op_mangle_omit (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1474 if (p0
>= in_len
) return (in_len
);
1476 if ((p0
+ p1
) > in_len
) return (in_len
);
1478 u32 out_len
= in_len
;
1492 lshift_block_N (buf0
, buf1
, tib40
, tib41
, p1
);
1494 const u32 ml
= (1 << ((p0
& 3) * 8)) - 1;
1499 case 0: buf0
[0] = (buf0
[0] & ml
)
1509 case 1: buf0
[1] = (buf0
[1] & ml
)
1518 case 2: buf0
[2] = (buf0
[2] & ml
)
1526 case 3: buf0
[3] = (buf0
[3] & ml
)
1533 case 4: buf1
[0] = (buf1
[0] & ml
)
1539 case 5: buf1
[1] = (buf1
[1] & ml
)
1544 case 6: buf1
[2] = (buf1
[2] & ml
)
1548 case 7: buf1
[3] = (buf1
[3] & ml
)
1558 static u32
rule_op_mangle_insert (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1560 if (p0
> in_len
) return (in_len
);
1562 if ((in_len
+ 1) >= 32) return (in_len
);
1564 u32 out_len
= in_len
;
1569 rshift_block (buf0
, buf1
, tib40
, tib41
);
1571 const u32 p1n
= p1
<< ((p0
& 3) * 8);
1573 const u32 ml
= (1 << ((p0
& 3) * 8)) - 1;
1575 const u32 mr
= 0xffffff00 << ((p0
& 3) * 8);
1579 case 0: buf0
[0] = (buf0
[0] & ml
) | p1n
| (tib40
[0] & mr
);
1588 case 1: buf0
[1] = (buf0
[1] & ml
) | p1n
| (tib40
[1] & mr
);
1596 case 2: buf0
[2] = (buf0
[2] & ml
) | p1n
| (tib40
[2] & mr
);
1603 case 3: buf0
[3] = (buf0
[3] & ml
) | p1n
| (tib40
[3] & mr
);
1609 case 4: buf1
[0] = (buf1
[0] & ml
) | p1n
| (tib41
[0] & mr
);
1614 case 5: buf1
[1] = (buf1
[1] & ml
) | p1n
| (tib41
[1] & mr
);
1618 case 6: buf1
[2] = (buf1
[2] & ml
) | p1n
| (tib41
[2] & mr
);
1621 case 7: buf1
[3] = (buf1
[3] & ml
) | p1n
| (tib41
[3] & mr
);
1630 static u32
rule_op_mangle_overstrike (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1632 if (p0
>= in_len
) return (in_len
);
1634 const u32 p1n
= p1
<< ((p0
& 3) * 8);
1636 const u32 m
= ~(0xff << ((p0
& 3) * 8));
1640 case 0: buf0
[0] = (buf0
[0] & m
) | p1n
; break;
1641 case 1: buf0
[1] = (buf0
[1] & m
) | p1n
; break;
1642 case 2: buf0
[2] = (buf0
[2] & m
) | p1n
; break;
1643 case 3: buf0
[3] = (buf0
[3] & m
) | p1n
; break;
1644 case 4: buf1
[0] = (buf1
[0] & m
) | p1n
; break;
1645 case 5: buf1
[1] = (buf1
[1] & m
) | p1n
; break;
1646 case 6: buf1
[2] = (buf1
[2] & m
) | p1n
; break;
1647 case 7: buf1
[3] = (buf1
[3] & m
) | p1n
; break;
1653 static u32
rule_op_mangle_truncate_at (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1655 if (p0
>= in_len
) return (in_len
);
1657 truncate_right (buf0
, buf1
, p0
);
1662 static u32
rule_op_mangle_replace (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1666 const uchar4 tmp0
= (uchar4
) (p0
);
1667 const uchar4 tmp1
= (uchar4
) (p1
);
1671 tmp
= as_uchar4 (buf0
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[0] = as_uint (tmp
);
1672 tmp
= as_uchar4 (buf0
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[1] = as_uint (tmp
);
1673 tmp
= as_uchar4 (buf0
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[2] = as_uint (tmp
);
1674 tmp
= as_uchar4 (buf0
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[3] = as_uint (tmp
);
1675 tmp
= as_uchar4 (buf1
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[0] = as_uint (tmp
);
1676 tmp
= as_uchar4 (buf1
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[1] = as_uint (tmp
);
1677 tmp
= as_uchar4 (buf1
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[2] = as_uint (tmp
);
1678 tmp
= as_uchar4 (buf1
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[3] = as_uint (tmp
);
1684 const uchar8 tmp0
= (uchar8
) (p0
);
1685 const uchar8 tmp1
= (uchar8
) (p1
);
1689 tmp
= as_uchar8 (buf0
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[0] = as_uint2 (tmp
);
1690 tmp
= as_uchar8 (buf0
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[1] = as_uint2 (tmp
);
1691 tmp
= as_uchar8 (buf0
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[2] = as_uint2 (tmp
);
1692 tmp
= as_uchar8 (buf0
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[3] = as_uint2 (tmp
);
1693 tmp
= as_uchar8 (buf1
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[0] = as_uint2 (tmp
);
1694 tmp
= as_uchar8 (buf1
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[1] = as_uint2 (tmp
);
1695 tmp
= as_uchar8 (buf1
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[2] = as_uint2 (tmp
);
1696 tmp
= as_uchar8 (buf1
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[3] = as_uint2 (tmp
);
1702 const uchar16 tmp0
= (uchar16
) (p0
);
1703 const uchar16 tmp1
= (uchar16
) (p1
);
1707 tmp
= as_uchar16 (buf0
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[0] = as_uint4 (tmp
);
1708 tmp
= as_uchar16 (buf0
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[1] = as_uint4 (tmp
);
1709 tmp
= as_uchar16 (buf0
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[2] = as_uint4 (tmp
);
1710 tmp
= as_uchar16 (buf0
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[3] = as_uint4 (tmp
);
1711 tmp
= as_uchar16 (buf1
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[0] = as_uint4 (tmp
);
1712 tmp
= as_uchar16 (buf1
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[1] = as_uint4 (tmp
);
1713 tmp
= as_uchar16 (buf1
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[2] = as_uint4 (tmp
);
1714 tmp
= as_uchar16 (buf1
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[3] = as_uint4 (tmp
);
1721 static u32
rule_op_mangle_purgechar (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1727 static u32
rule_op_mangle_togglecase_rec (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1733 static u32
rule_op_mangle_dupechar_first (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1735 if ( in_len
== 0) return (in_len
);
1736 if ((in_len
+ p0
) >= 32) return (in_len
);
1738 u32 out_len
= in_len
;
1740 const u32x tmp
= buf0
[0] & 0xFF;
1742 rshift_block_N (buf0
, buf1
, buf0
, buf1
, p0
);
1746 case 1: buf0
[0] |= tmp
<< 0;
1748 case 2: buf0
[0] |= tmp
<< 0 | tmp
<< 8;
1750 case 3: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
1752 case 4: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1754 case 5: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1755 buf0
[1] |= tmp
<< 0;
1757 case 6: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1758 buf0
[1] |= tmp
<< 0 | tmp
<< 8;
1760 case 7: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1761 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
1763 case 8: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1764 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1766 case 9: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1767 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1768 buf0
[2] |= tmp
<< 0;
1770 case 10: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1771 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1772 buf0
[2] |= tmp
<< 0 | tmp
<< 8;
1774 case 11: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1775 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1776 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
1778 case 12: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1779 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1780 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1782 case 13: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1783 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1784 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1785 buf0
[3] |= tmp
<< 0;
1787 case 14: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1788 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1789 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1790 buf0
[3] |= tmp
<< 0 | tmp
<< 8;
1792 case 15: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1793 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1794 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1795 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
1797 case 16: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1798 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1799 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1800 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1802 case 17: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1803 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1804 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1805 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1806 buf1
[0] |= tmp
<< 0;
1808 case 18: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1809 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1810 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1811 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1812 buf1
[0] |= tmp
<< 0 | tmp
<< 8;
1814 case 19: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1815 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1816 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1817 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1818 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
1820 case 20: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1821 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1822 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1823 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1824 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1826 case 21: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1827 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1828 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1829 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1830 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1831 buf1
[1] |= tmp
<< 0;
1833 case 22: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1834 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1835 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1836 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1837 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1838 buf1
[1] |= tmp
<< 0 | tmp
<< 8;
1840 case 23: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1841 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1842 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1843 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1844 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1845 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
1847 case 24: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1848 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1849 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1850 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1851 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1852 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1854 case 25: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1855 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1856 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1857 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1858 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1859 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1860 buf1
[2] |= tmp
<< 0;
1862 case 26: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1863 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1864 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1865 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1866 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1867 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1868 buf1
[2] |= tmp
<< 0 | tmp
<< 8;
1870 case 27: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1871 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1872 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1873 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1874 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1875 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1876 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
1878 case 28: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1879 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1880 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1881 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1882 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1883 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1884 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1886 case 29: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1887 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1888 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1889 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1890 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1891 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1892 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1893 buf1
[3] |= tmp
<< 0;
1895 case 30: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1896 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1897 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1898 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1899 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1900 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1901 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1902 buf1
[3] |= tmp
<< 0 | tmp
<< 8;
1904 case 31: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1905 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1906 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1907 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1908 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1909 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1910 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
1911 buf1
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
1920 static u32
rule_op_mangle_dupechar_last (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1922 if ( in_len
== 0) return (in_len
);
1923 if ((in_len
+ p0
) >= 32) return (in_len
);
1925 const u32 in_len1
= in_len
- 1;
1927 const u32 sh
= (in_len1
& 3) * 8;
1931 switch (in_len1
/ 4)
1933 case 0: tmp
= (buf0
[0] >> sh
) & 0xff; break;
1934 case 1: tmp
= (buf0
[1] >> sh
) & 0xff; break;
1935 case 2: tmp
= (buf0
[2] >> sh
) & 0xff; break;
1936 case 3: tmp
= (buf0
[3] >> sh
) & 0xff; break;
1937 case 4: tmp
= (buf1
[0] >> sh
) & 0xff; break;
1938 case 5: tmp
= (buf1
[1] >> sh
) & 0xff; break;
1939 case 6: tmp
= (buf1
[2] >> sh
) & 0xff; break;
1940 case 7: tmp
= (buf1
[3] >> sh
) & 0xff; break;
1943 u32 out_len
= in_len
;
1945 for (u32 i
= 0; i
< p0
; i
++)
1947 append_block1 (out_len
, buf0
, buf1
, tmp
);
1955 static u32
rule_op_mangle_dupechar_all (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1957 if ( in_len
== 0) return (in_len
);
1958 if ((in_len
+ in_len
) >= 32) return (in_len
);
1960 u32 out_len
= in_len
;
1965 tib40
[0] = ((buf0
[0] & 0x000000FF) << 0) | ((buf0
[0] & 0x0000FF00) << 8);
1966 tib40
[1] = ((buf0
[0] & 0x00FF0000) >> 16) | ((buf0
[0] & 0xFF000000) >> 8);
1967 tib40
[2] = ((buf0
[1] & 0x000000FF) << 0) | ((buf0
[1] & 0x0000FF00) << 8);
1968 tib40
[3] = ((buf0
[1] & 0x00FF0000) >> 16) | ((buf0
[1] & 0xFF000000) >> 8);
1969 tib41
[0] = ((buf0
[2] & 0x000000FF) << 0) | ((buf0
[2] & 0x0000FF00) << 8);
1970 tib41
[1] = ((buf0
[2] & 0x00FF0000) >> 16) | ((buf0
[2] & 0xFF000000) >> 8);
1971 tib41
[2] = ((buf0
[3] & 0x000000FF) << 0) | ((buf0
[3] & 0x0000FF00) << 8);
1972 tib41
[3] = ((buf0
[3] & 0x00FF0000) >> 16) | ((buf0
[3] & 0xFF000000) >> 8);
1974 buf0
[0] = tib40
[0] | (tib40
[0] << 8);
1975 buf0
[1] = tib40
[1] | (tib40
[1] << 8);
1976 buf0
[2] = tib40
[2] | (tib40
[2] << 8);
1977 buf0
[3] = tib40
[3] | (tib40
[3] << 8);
1978 buf1
[0] = tib41
[0] | (tib41
[0] << 8);
1979 buf1
[1] = tib41
[1] | (tib41
[1] << 8);
1980 buf1
[2] = tib41
[2] | (tib41
[2] << 8);
1981 buf1
[3] = tib41
[3] | (tib41
[3] << 8);
1983 out_len
= out_len
+ out_len
;
1988 static u32
rule_op_mangle_switch_first (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1990 if (in_len
< 2) return (in_len
);
1992 buf0
[0] = (buf0
[0] & 0xFFFF0000) | ((buf0
[0] << 8) & 0x0000FF00) | ((buf0
[0] >> 8) & 0x000000FF);
1997 static u32
rule_op_mangle_switch_last (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
1999 if (in_len
< 2) return (in_len
);
2003 case 2: buf0
[0] = ((buf0
[0] << 8) & 0x0000FF00) | ((buf0
[0] >> 8) & 0x000000FF);
2005 case 3: buf0
[0] = (buf0
[0] & 0x000000FF) | ((buf0
[0] << 8) & 0x00FF0000) | ((buf0
[0] >> 8) & 0x0000FF00);
2007 case 4: buf0
[0] = (buf0
[0] & 0x0000FFFF) | ((buf0
[0] << 8) & 0xFF000000) | ((buf0
[0] >> 8) & 0x00FF0000);
2009 case 5: buf0
[1] = (buf0
[0] & 0xFF000000) | buf0
[1];
2010 buf0
[0] = (buf0
[0] & 0x00FFFFFF) | (buf0
[1] << 24);
2011 buf0
[1] = (buf0
[1] >> 24);
2013 case 6: buf0
[1] = ((buf0
[1] << 8) & 0x0000FF00) | ((buf0
[1] >> 8) & 0x000000FF);
2015 case 7: buf0
[1] = (buf0
[1] & 0x000000FF) | ((buf0
[1] << 8) & 0x00FF0000) | ((buf0
[1] >> 8) & 0x0000FF00);
2017 case 8: buf0
[1] = (buf0
[1] & 0x0000FFFF) | ((buf0
[1] << 8) & 0xFF000000) | ((buf0
[1] >> 8) & 0x00FF0000);
2019 case 9: buf0
[2] = (buf0
[1] & 0xFF000000) | buf0
[2];
2020 buf0
[1] = (buf0
[1] & 0x00FFFFFF) | (buf0
[2] << 24);
2021 buf0
[2] = (buf0
[2] >> 24);
2023 case 10: buf0
[2] = ((buf0
[2] << 8) & 0x0000FF00) | ((buf0
[2] >> 8) & 0x000000FF);
2025 case 11: buf0
[2] = (buf0
[2] & 0x000000FF) | ((buf0
[2] << 8) & 0x00FF0000) | ((buf0
[2] >> 8) & 0x0000FF00);
2027 case 12: buf0
[2] = (buf0
[2] & 0x0000FFFF) | ((buf0
[2] << 8) & 0xFF000000) | ((buf0
[2] >> 8) & 0x00FF0000);
2029 case 13: buf0
[3] = (buf0
[2] & 0xFF000000) | buf0
[3];
2030 buf0
[2] = (buf0
[2] & 0x00FFFFFF) | (buf0
[3] << 24);
2031 buf0
[3] = (buf0
[3] >> 24);
2033 case 14: buf0
[3] = ((buf0
[3] << 8) & 0x0000FF00) | ((buf0
[3] >> 8) & 0x000000FF);
2035 case 15: buf0
[3] = (buf0
[3] & 0x000000FF) | ((buf0
[3] << 8) & 0x00FF0000) | ((buf0
[3] >> 8) & 0x0000FF00);
2037 case 16: buf0
[3] = (buf0
[3] & 0x0000FFFF) | ((buf0
[3] << 8) & 0xFF000000) | ((buf0
[3] >> 8) & 0x00FF0000);
2039 case 17: buf1
[0] = (buf0
[3] & 0xFF000000) | buf1
[0];
2040 buf0
[3] = (buf0
[3] & 0x00FFFFFF) | (buf1
[0] << 24);
2041 buf1
[0] = (buf1
[0] >> 24);
2043 case 18: buf1
[0] = ((buf1
[0] << 8) & 0x0000FF00) | ((buf1
[0] >> 8) & 0x000000FF);
2045 case 19: buf1
[0] = (buf1
[0] & 0x000000FF) | ((buf1
[0] << 8) & 0x00FF0000) | ((buf1
[0] >> 8) & 0x0000FF00);
2047 case 20: buf1
[0] = (buf1
[0] & 0x0000FFFF) | ((buf1
[0] << 8) & 0xFF000000) | ((buf1
[0] >> 8) & 0x00FF0000);
2049 case 21: buf1
[1] = (buf1
[0] & 0xFF000000) | buf1
[1];
2050 buf1
[0] = (buf1
[0] & 0x00FFFFFF) | (buf1
[1] << 24);
2051 buf1
[1] = (buf1
[1] >> 24);
2053 case 22: buf1
[1] = ((buf1
[1] << 8) & 0x0000FF00) | ((buf1
[1] >> 8) & 0x000000FF);
2055 case 23: buf1
[1] = (buf1
[1] & 0x000000FF) | ((buf1
[1] << 8) & 0x00FF0000) | ((buf1
[1] >> 8) & 0x0000FF00);
2057 case 24: buf1
[1] = (buf1
[1] & 0x0000FFFF) | ((buf1
[1] << 8) & 0xFF000000) | ((buf1
[1] >> 8) & 0x00FF0000);
2059 case 25: buf1
[2] = (buf1
[1] & 0xFF000000) | buf1
[2];
2060 buf1
[1] = (buf1
[1] & 0x00FFFFFF) | (buf1
[2] << 24);
2061 buf1
[2] = (buf1
[2] >> 24);
2063 case 26: buf1
[2] = ((buf1
[2] << 8) & 0x0000FF00) | ((buf1
[2] >> 8) & 0x000000FF);
2065 case 27: buf1
[2] = (buf1
[2] & 0x000000FF) | ((buf1
[2] << 8) & 0x00FF0000) | ((buf1
[2] >> 8) & 0x0000FF00);
2067 case 28: buf1
[2] = (buf1
[2] & 0x0000FFFF) | ((buf1
[2] << 8) & 0xFF000000) | ((buf1
[2] >> 8) & 0x00FF0000);
2069 case 29: buf1
[3] = (buf1
[2] & 0xFF000000) | buf1
[3];
2070 buf1
[2] = (buf1
[2] & 0x00FFFFFF) | (buf1
[3] << 24);
2071 buf1
[3] = (buf1
[3] >> 24);
2073 case 30: buf1
[3] = ((buf1
[3] << 8) & 0x0000FF00) | ((buf1
[3] >> 8) & 0x000000FF);
2075 case 31: buf1
[3] = (buf1
[3] & 0x000000FF) | ((buf1
[3] << 8) & 0x00FF0000) | ((buf1
[3] >> 8) & 0x0000FF00);
2082 static u32
rule_op_mangle_switch_at (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2084 if (p0
>= in_len
) return (in_len
);
2085 if (p1
>= in_len
) return (in_len
);
2092 case 0: tmp0
= (buf0
[0] >> 0) & 0xFF;
2094 case 1: tmp0
= (buf0
[0] >> 8) & 0xFF;
2096 case 2: tmp0
= (buf0
[0] >> 16) & 0xFF;
2098 case 3: tmp0
= (buf0
[0] >> 24) & 0xFF;
2100 case 4: tmp0
= (buf0
[1] >> 0) & 0xFF;
2102 case 5: tmp0
= (buf0
[1] >> 8) & 0xFF;
2104 case 6: tmp0
= (buf0
[1] >> 16) & 0xFF;
2106 case 7: tmp0
= (buf0
[1] >> 24) & 0xFF;
2108 case 8: tmp0
= (buf0
[2] >> 0) & 0xFF;
2110 case 9: tmp0
= (buf0
[2] >> 8) & 0xFF;
2112 case 10: tmp0
= (buf0
[2] >> 16) & 0xFF;
2114 case 11: tmp0
= (buf0
[2] >> 24) & 0xFF;
2116 case 12: tmp0
= (buf0
[3] >> 0) & 0xFF;
2118 case 13: tmp0
= (buf0
[3] >> 8) & 0xFF;
2120 case 14: tmp0
= (buf0
[3] >> 16) & 0xFF;
2122 case 15: tmp0
= (buf0
[3] >> 24) & 0xFF;
2124 case 16: tmp0
= (buf1
[0] >> 0) & 0xFF;
2126 case 17: tmp0
= (buf1
[0] >> 8) & 0xFF;
2128 case 18: tmp0
= (buf1
[0] >> 16) & 0xFF;
2130 case 19: tmp0
= (buf1
[0] >> 24) & 0xFF;
2132 case 20: tmp0
= (buf1
[1] >> 0) & 0xFF;
2134 case 21: tmp0
= (buf1
[1] >> 8) & 0xFF;
2136 case 22: tmp0
= (buf1
[1] >> 16) & 0xFF;
2138 case 23: tmp0
= (buf1
[1] >> 24) & 0xFF;
2140 case 24: tmp0
= (buf1
[2] >> 0) & 0xFF;
2142 case 25: tmp0
= (buf1
[2] >> 8) & 0xFF;
2144 case 26: tmp0
= (buf1
[2] >> 16) & 0xFF;
2146 case 27: tmp0
= (buf1
[2] >> 24) & 0xFF;
2148 case 28: tmp0
= (buf1
[3] >> 0) & 0xFF;
2150 case 29: tmp0
= (buf1
[3] >> 8) & 0xFF;
2152 case 30: tmp0
= (buf1
[3] >> 16) & 0xFF;
2154 case 31: tmp0
= (buf1
[3] >> 24) & 0xFF;
2160 case 0: tmp1
= (buf0
[0] >> 0) & 0xff;
2161 buf0
[0] = (buf0
[0] & 0xffffff00) | tmp0
<< 0;
2163 case 1: tmp1
= (buf0
[0] >> 8) & 0xff;
2164 buf0
[0] = (buf0
[0] & 0xffff00ff) | tmp0
<< 8;
2166 case 2: tmp1
= (buf0
[0] >> 16) & 0xff;
2167 buf0
[0] = (buf0
[0] & 0xff00ffff) | tmp0
<< 16;
2169 case 3: tmp1
= (buf0
[0] >> 24) & 0xff;
2170 buf0
[0] = (buf0
[0] & 0x00ffffff) | tmp0
<< 24;
2172 case 4: tmp1
= (buf0
[1] >> 0) & 0xff;
2173 buf0
[1] = (buf0
[1] & 0xffffff00) | tmp0
<< 0;
2175 case 5: tmp1
= (buf0
[1] >> 8) & 0xff;
2176 buf0
[1] = (buf0
[1] & 0xffff00ff) | tmp0
<< 8;
2178 case 6: tmp1
= (buf0
[1] >> 16) & 0xff;
2179 buf0
[1] = (buf0
[1] & 0xff00ffff) | tmp0
<< 16;
2181 case 7: tmp1
= (buf0
[1] >> 24) & 0xff;
2182 buf0
[1] = (buf0
[1] & 0x00ffffff) | tmp0
<< 24;
2184 case 8: tmp1
= (buf0
[2] >> 0) & 0xff;
2185 buf0
[2] = (buf0
[2] & 0xffffff00) | tmp0
<< 0;
2187 case 9: tmp1
= (buf0
[2] >> 8) & 0xff;
2188 buf0
[2] = (buf0
[2] & 0xffff00ff) | tmp0
<< 8;
2190 case 10: tmp1
= (buf0
[2] >> 16) & 0xff;
2191 buf0
[2] = (buf0
[2] & 0xff00ffff) | tmp0
<< 16;
2193 case 11: tmp1
= (buf0
[2] >> 24) & 0xff;
2194 buf0
[2] = (buf0
[2] & 0x00ffffff) | tmp0
<< 24;
2196 case 12: tmp1
= (buf0
[3] >> 0) & 0xff;
2197 buf0
[3] = (buf0
[3] & 0xffffff00) | tmp0
<< 0;
2199 case 13: tmp1
= (buf0
[3] >> 8) & 0xff;
2200 buf0
[3] = (buf0
[3] & 0xffff00ff) | tmp0
<< 8;
2202 case 14: tmp1
= (buf0
[3] >> 16) & 0xff;
2203 buf0
[3] = (buf0
[3] & 0xff00ffff) | tmp0
<< 16;
2205 case 15: tmp1
= (buf0
[3] >> 24) & 0xff;
2206 buf0
[3] = (buf0
[3] & 0x00ffffff) | tmp0
<< 24;
2208 case 16: tmp1
= (buf1
[0] >> 0) & 0xff;
2209 buf1
[0] = (buf1
[0] & 0xffffff00) | tmp0
<< 0;
2211 case 17: tmp1
= (buf1
[0] >> 8) & 0xff;
2212 buf1
[0] = (buf1
[0] & 0xffff00ff) | tmp0
<< 8;
2214 case 18: tmp1
= (buf1
[0] >> 16) & 0xff;
2215 buf1
[0] = (buf1
[0] & 0xff00ffff) | tmp0
<< 16;
2217 case 19: tmp1
= (buf1
[0] >> 24) & 0xff;
2218 buf1
[0] = (buf1
[0] & 0x00ffffff) | tmp0
<< 24;
2220 case 20: tmp1
= (buf1
[1] >> 0) & 0xff;
2221 buf1
[1] = (buf1
[1] & 0xffffff00) | tmp0
<< 0;
2223 case 21: tmp1
= (buf1
[1] >> 8) & 0xff;
2224 buf1
[1] = (buf1
[1] & 0xffff00ff) | tmp0
<< 8;
2226 case 22: tmp1
= (buf1
[1] >> 16) & 0xff;
2227 buf1
[1] = (buf1
[1] & 0xff00ffff) | tmp0
<< 16;
2229 case 23: tmp1
= (buf1
[1] >> 24) & 0xff;
2230 buf1
[1] = (buf1
[1] & 0x00ffffff) | tmp0
<< 24;
2232 case 24: tmp1
= (buf1
[2] >> 0) & 0xff;
2233 buf1
[2] = (buf1
[2] & 0xffffff00) | tmp0
<< 0;
2235 case 25: tmp1
= (buf1
[2] >> 8) & 0xff;
2236 buf1
[2] = (buf1
[2] & 0xffff00ff) | tmp0
<< 8;
2238 case 26: tmp1
= (buf1
[2] >> 16) & 0xff;
2239 buf1
[2] = (buf1
[2] & 0xff00ffff) | tmp0
<< 16;
2241 case 27: tmp1
= (buf1
[2] >> 24) & 0xff;
2242 buf1
[2] = (buf1
[2] & 0x00ffffff) | tmp0
<< 24;
2244 case 28: tmp1
= (buf1
[3] >> 0) & 0xff;
2245 buf1
[3] = (buf1
[3] & 0xffffff00) | tmp0
<< 0;
2247 case 29: tmp1
= (buf1
[3] >> 8) & 0xff;
2248 buf1
[3] = (buf1
[3] & 0xffff00ff) | tmp0
<< 8;
2250 case 30: tmp1
= (buf1
[3] >> 16) & 0xff;
2251 buf1
[3] = (buf1
[3] & 0xff00ffff) | tmp0
<< 16;
2253 case 31: tmp1
= (buf1
[3] >> 24) & 0xff;
2254 buf1
[3] = (buf1
[3] & 0x00ffffff) | tmp0
<< 24;
2260 case 0: buf0
[0] = (buf0
[0] & 0xffffff00) | tmp1
<< 0;
2262 case 1: buf0
[0] = (buf0
[0] & 0xffff00ff) | tmp1
<< 8;
2264 case 2: buf0
[0] = (buf0
[0] & 0xff00ffff) | tmp1
<< 16;
2266 case 3: buf0
[0] = (buf0
[0] & 0x00ffffff) | tmp1
<< 24;
2268 case 4: buf0
[1] = (buf0
[1] & 0xffffff00) | tmp1
<< 0;
2270 case 5: buf0
[1] = (buf0
[1] & 0xffff00ff) | tmp1
<< 8;
2272 case 6: buf0
[1] = (buf0
[1] & 0xff00ffff) | tmp1
<< 16;
2274 case 7: buf0
[1] = (buf0
[1] & 0x00ffffff) | tmp1
<< 24;
2276 case 8: buf0
[2] = (buf0
[2] & 0xffffff00) | tmp1
<< 0;
2278 case 9: buf0
[2] = (buf0
[2] & 0xffff00ff) | tmp1
<< 8;
2280 case 10: buf0
[2] = (buf0
[2] & 0xff00ffff) | tmp1
<< 16;
2282 case 11: buf0
[2] = (buf0
[2] & 0x00ffffff) | tmp1
<< 24;
2284 case 12: buf0
[3] = (buf0
[3] & 0xffffff00) | tmp1
<< 0;
2286 case 13: buf0
[3] = (buf0
[3] & 0xffff00ff) | tmp1
<< 8;
2288 case 14: buf0
[3] = (buf0
[3] & 0xff00ffff) | tmp1
<< 16;
2290 case 15: buf0
[3] = (buf0
[3] & 0x00ffffff) | tmp1
<< 24;
2292 case 16: buf1
[0] = (buf1
[0] & 0xffffff00) | tmp1
<< 0;
2294 case 17: buf1
[0] = (buf1
[0] & 0xffff00ff) | tmp1
<< 8;
2296 case 18: buf1
[0] = (buf1
[0] & 0xff00ffff) | tmp1
<< 16;
2298 case 19: buf1
[0] = (buf1
[0] & 0x00ffffff) | tmp1
<< 24;
2300 case 20: buf1
[1] = (buf1
[1] & 0xffffff00) | tmp1
<< 0;
2302 case 21: buf1
[1] = (buf1
[1] & 0xffff00ff) | tmp1
<< 8;
2304 case 22: buf1
[1] = (buf1
[1] & 0xff00ffff) | tmp1
<< 16;
2306 case 23: buf1
[1] = (buf1
[1] & 0x00ffffff) | tmp1
<< 24;
2308 case 24: buf1
[2] = (buf1
[2] & 0xffffff00) | tmp1
<< 0;
2310 case 25: buf1
[2] = (buf1
[2] & 0xffff00ff) | tmp1
<< 8;
2312 case 26: buf1
[2] = (buf1
[2] & 0xff00ffff) | tmp1
<< 16;
2314 case 27: buf1
[2] = (buf1
[2] & 0x00ffffff) | tmp1
<< 24;
2316 case 28: buf1
[3] = (buf1
[3] & 0xffffff00) | tmp1
<< 0;
2318 case 29: buf1
[3] = (buf1
[3] & 0xffff00ff) | tmp1
<< 8;
2320 case 30: buf1
[3] = (buf1
[3] & 0xff00ffff) | tmp1
<< 16;
2322 case 31: buf1
[3] = (buf1
[3] & 0x00ffffff) | tmp1
<< 24;
2329 static u32
rule_op_mangle_chr_shiftl (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2331 if (p0
>= in_len
) return (in_len
);
2333 const u32 mr
= 0xff << ((p0
& 3) * 8);
2338 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) << 1) & mr
); break;
2339 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) << 1) & mr
); break;
2340 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) << 1) & mr
); break;
2341 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) << 1) & mr
); break;
2342 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) << 1) & mr
); break;
2343 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) << 1) & mr
); break;
2344 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) << 1) & mr
); break;
2345 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) << 1) & mr
); break;
2351 static u32
rule_op_mangle_chr_shiftr (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2353 if (p0
>= in_len
) return (in_len
);
2355 const u32 mr
= 0xff << ((p0
& 3) * 8);
2360 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) >> 1) & mr
); break;
2361 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) >> 1) & mr
); break;
2362 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) >> 1) & mr
); break;
2363 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) >> 1) & mr
); break;
2364 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) >> 1) & mr
); break;
2365 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) >> 1) & mr
); break;
2366 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) >> 1) & mr
); break;
2367 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) >> 1) & mr
); break;
2373 static u32
rule_op_mangle_chr_incr (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2375 if (p0
>= in_len
) return (in_len
);
2377 const u32 mr
= 0xff << ((p0
& 3) * 8);
2380 const u32 n
= 0x01010101 & mr
;
2384 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) + n
) & mr
); break;
2385 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) + n
) & mr
); break;
2386 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) + n
) & mr
); break;
2387 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) + n
) & mr
); break;
2388 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) + n
) & mr
); break;
2389 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) + n
) & mr
); break;
2390 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) + n
) & mr
); break;
2391 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) + n
) & mr
); break;
2397 static u32
rule_op_mangle_chr_decr (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2399 if (p0
>= in_len
) return (in_len
);
2401 const u32 mr
= 0xff << ((p0
& 3) * 8);
2404 const u32 n
= 0x01010101 & mr
;
2408 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) - n
) & mr
); break;
2409 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) - n
) & mr
); break;
2410 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) - n
) & mr
); break;
2411 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) - n
) & mr
); break;
2412 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) - n
) & mr
); break;
2413 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) - n
) & mr
); break;
2414 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) - n
) & mr
); break;
2415 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) - n
) & mr
); break;
2421 static u32
rule_op_mangle_replace_np1 (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2423 if ((p0
+ 1) >= in_len
) return (in_len
);
2428 lshift_block (buf0
, buf1
, tib40
, tib41
);
2430 const u32 mr
= 0xff << ((p0
& 3) * 8);
2435 case 0: buf0
[0] = (buf0
[0] & ml
) | (tib40
[0] & mr
); break;
2436 case 1: buf0
[1] = (buf0
[1] & ml
) | (tib40
[1] & mr
); break;
2437 case 2: buf0
[2] = (buf0
[2] & ml
) | (tib40
[2] & mr
); break;
2438 case 3: buf0
[3] = (buf0
[3] & ml
) | (tib40
[3] & mr
); break;
2439 case 4: buf1
[0] = (buf1
[0] & ml
) | (tib41
[0] & mr
); break;
2440 case 5: buf1
[1] = (buf1
[1] & ml
) | (tib41
[1] & mr
); break;
2441 case 6: buf1
[2] = (buf1
[2] & ml
) | (tib41
[2] & mr
); break;
2442 case 7: buf1
[3] = (buf1
[3] & ml
) | (tib41
[3] & mr
); break;
2448 static u32
rule_op_mangle_replace_nm1 (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2450 if (p0
== 0) return (in_len
);
2452 if (p0
>= in_len
) return (in_len
);
2457 rshift_block (buf0
, buf1
, tib40
, tib41
);
2459 const u32 mr
= 0xff << ((p0
& 3) * 8);
2464 case 0: buf0
[0] = (buf0
[0] & ml
) | (tib40
[0] & mr
); break;
2465 case 1: buf0
[1] = (buf0
[1] & ml
) | (tib40
[1] & mr
); break;
2466 case 2: buf0
[2] = (buf0
[2] & ml
) | (tib40
[2] & mr
); break;
2467 case 3: buf0
[3] = (buf0
[3] & ml
) | (tib40
[3] & mr
); break;
2468 case 4: buf1
[0] = (buf1
[0] & ml
) | (tib41
[0] & mr
); break;
2469 case 5: buf1
[1] = (buf1
[1] & ml
) | (tib41
[1] & mr
); break;
2470 case 6: buf1
[2] = (buf1
[2] & ml
) | (tib41
[2] & mr
); break;
2471 case 7: buf1
[3] = (buf1
[3] & ml
) | (tib41
[3] & mr
); break;
2477 static u32
rule_op_mangle_dupeblock_first (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2479 if (p0
> in_len
) return (in_len
);
2481 if ((in_len
+ p0
) >= 32) return (in_len
);
2483 u32 out_len
= in_len
;
2497 truncate_right (tib40
, tib41
, p0
);
2499 rshift_block_N (buf0
, buf1
, buf0
, buf1
, p0
);
2501 buf0
[0] |= tib40
[0];
2502 buf0
[1] |= tib40
[1];
2503 buf0
[2] |= tib40
[2];
2504 buf0
[3] |= tib40
[3];
2505 buf1
[0] |= tib41
[0];
2506 buf1
[1] |= tib41
[1];
2507 buf1
[2] |= tib41
[2];
2508 buf1
[3] |= tib41
[3];
2515 static u32
rule_op_mangle_dupeblock_last (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2517 if (p0
> in_len
) return (in_len
);
2519 if ((in_len
+ p0
) >= 32) return (in_len
);
2521 u32 out_len
= in_len
;
2526 rshift_block_N (buf0
, buf1
, tib40
, tib41
, p0
);
2528 truncate_left (tib40
, tib41
, out_len
);
2530 buf0
[0] |= tib40
[0];
2531 buf0
[1] |= tib40
[1];
2532 buf0
[2] |= tib40
[2];
2533 buf0
[3] |= tib40
[3];
2534 buf1
[0] |= tib41
[0];
2535 buf1
[1] |= tib41
[1];
2536 buf1
[2] |= tib41
[2];
2537 buf1
[3] |= tib41
[3];
2544 static u32
rule_op_mangle_title (const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2546 buf0
[0] |= (generate_cmask (buf0
[0]));
2547 buf0
[1] |= (generate_cmask (buf0
[1]));
2548 buf0
[2] |= (generate_cmask (buf0
[2]));
2549 buf0
[3] |= (generate_cmask (buf0
[3]));
2550 buf1
[0] |= (generate_cmask (buf1
[0]));
2551 buf1
[1] |= (generate_cmask (buf1
[1]));
2552 buf1
[2] |= (generate_cmask (buf1
[2]));
2553 buf1
[3] |= (generate_cmask (buf1
[3]));
2560 const uchar4 tmp0
= (uchar4
) (' ');
2561 const uchar4 tmp1
= (uchar4
) (0x00);
2562 const uchar4 tmp2
= (uchar4
) (0xff);
2566 tmp
= as_uchar4 (buf0
[0]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[0] = as_uint (tmp
);
2567 tmp
= as_uchar4 (buf0
[1]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[1] = as_uint (tmp
);
2568 tmp
= as_uchar4 (buf0
[2]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[2] = as_uint (tmp
);
2569 tmp
= as_uchar4 (buf0
[3]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[3] = as_uint (tmp
);
2570 tmp
= as_uchar4 (buf1
[0]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[0] = as_uint (tmp
);
2571 tmp
= as_uchar4 (buf1
[1]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[1] = as_uint (tmp
);
2572 tmp
= as_uchar4 (buf1
[2]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[2] = as_uint (tmp
);
2573 tmp
= as_uchar4 (buf1
[3]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[3] = as_uint (tmp
);
2575 rshift_block (tib40
, tib41
, tib40
, tib41
); tib40
[0] |= 0xff;
2577 buf0
[0] &= ~(generate_cmask (buf0
[0]) & tib40
[0]);
2578 buf0
[1] &= ~(generate_cmask (buf0
[1]) & tib40
[1]);
2579 buf0
[2] &= ~(generate_cmask (buf0
[2]) & tib40
[2]);
2580 buf0
[3] &= ~(generate_cmask (buf0
[3]) & tib40
[3]);
2581 buf1
[0] &= ~(generate_cmask (buf1
[0]) & tib41
[0]);
2582 buf1
[1] &= ~(generate_cmask (buf1
[1]) & tib41
[1]);
2583 buf1
[2] &= ~(generate_cmask (buf1
[2]) & tib41
[2]);
2584 buf1
[3] &= ~(generate_cmask (buf1
[3]) & tib41
[3]);
2588 buf0
[0] &= ~(0x00000020 & generate_cmask (buf0
[0]));
2590 // The VLIW1 code above freezes VLIW4 and VLIW5 systems
2592 for (u32 i
= 0; i
< in_len
; i
++)
2599 case 0: tmp0
= (buf0
[0] >> 0) & 0xFF;
2600 tmp1
= ~(0x00002000 & generate_cmask (buf0
[0])); break;
2601 case 1: tmp0
= (buf0
[0] >> 8) & 0xFF;
2602 tmp1
= ~(0x00200000 & generate_cmask (buf0
[0])); break;
2603 case 2: tmp0
= (buf0
[0] >> 16) & 0xFF;
2604 tmp1
= ~(0x20000000 & generate_cmask (buf0
[0])); break;
2605 case 3: tmp0
= (buf0
[0] >> 24) & 0xFF;
2606 tmp1
= ~(0x00000020 & generate_cmask (buf0
[1])); break;
2607 case 4: tmp0
= (buf0
[1] >> 0) & 0xFF;
2608 tmp1
= ~(0x00002000 & generate_cmask (buf0
[1])); break;
2609 case 5: tmp0
= (buf0
[1] >> 8) & 0xFF;
2610 tmp1
= ~(0x00200000 & generate_cmask (buf0
[1])); break;
2611 case 6: tmp0
= (buf0
[1] >> 16) & 0xFF;
2612 tmp1
= ~(0x20000000 & generate_cmask (buf0
[1])); break;
2613 case 7: tmp0
= (buf0
[1] >> 24) & 0xFF;
2614 tmp1
= ~(0x00000020 & generate_cmask (buf0
[2])); break;
2615 case 8: tmp0
= (buf0
[2] >> 0) & 0xFF;
2616 tmp1
= ~(0x00002000 & generate_cmask (buf0
[2])); break;
2617 case 9: tmp0
= (buf0
[2] >> 8) & 0xFF;
2618 tmp1
= ~(0x00200000 & generate_cmask (buf0
[2])); break;
2619 case 10: tmp0
= (buf0
[2] >> 16) & 0xFF;
2620 tmp1
= ~(0x20000000 & generate_cmask (buf0
[2])); break;
2621 case 11: tmp0
= (buf0
[2] >> 24) & 0xFF;
2622 tmp1
= ~(0x00000020 & generate_cmask (buf0
[3])); break;
2623 case 12: tmp0
= (buf0
[3] >> 0) & 0xFF;
2624 tmp1
= ~(0x00002000 & generate_cmask (buf0
[3])); break;
2625 case 13: tmp0
= (buf0
[3] >> 8) & 0xFF;
2626 tmp1
= ~(0x00200000 & generate_cmask (buf0
[3])); break;
2627 case 14: tmp0
= (buf0
[3] >> 16) & 0xFF;
2628 tmp1
= ~(0x20000000 & generate_cmask (buf0
[3])); break;
2629 case 15: tmp0
= (buf0
[3] >> 24) & 0xFF;
2630 tmp1
= ~(0x00000020 & generate_cmask (buf1
[0])); break;
2631 case 16: tmp0
= (buf1
[0] >> 0) & 0xFF;
2632 tmp1
= ~(0x00002000 & generate_cmask (buf1
[0])); break;
2633 case 17: tmp0
= (buf1
[0] >> 8) & 0xFF;
2634 tmp1
= ~(0x00200000 & generate_cmask (buf1
[0])); break;
2635 case 18: tmp0
= (buf1
[0] >> 16) & 0xFF;
2636 tmp1
= ~(0x20000000 & generate_cmask (buf1
[0])); break;
2637 case 19: tmp0
= (buf1
[0] >> 24) & 0xFF;
2638 tmp1
= ~(0x00000020 & generate_cmask (buf1
[1])); break;
2639 case 20: tmp0
= (buf1
[1] >> 0) & 0xFF;
2640 tmp1
= ~(0x00002000 & generate_cmask (buf1
[1])); break;
2641 case 21: tmp0
= (buf1
[1] >> 8) & 0xFF;
2642 tmp1
= ~(0x00200000 & generate_cmask (buf1
[1])); break;
2643 case 22: tmp0
= (buf1
[1] >> 16) & 0xFF;
2644 tmp1
= ~(0x20000000 & generate_cmask (buf1
[1])); break;
2645 case 23: tmp0
= (buf1
[1] >> 24) & 0xFF;
2646 tmp1
= ~(0x00000020 & generate_cmask (buf1
[2])); break;
2647 case 24: tmp0
= (buf1
[2] >> 0) & 0xFF;
2648 tmp1
= ~(0x00002000 & generate_cmask (buf1
[2])); break;
2649 case 25: tmp0
= (buf1
[2] >> 8) & 0xFF;
2650 tmp1
= ~(0x00200000 & generate_cmask (buf1
[2])); break;
2651 case 26: tmp0
= (buf1
[2] >> 16) & 0xFF;
2652 tmp1
= ~(0x20000000 & generate_cmask (buf1
[2])); break;
2653 case 27: tmp0
= (buf1
[2] >> 24) & 0xFF;
2654 tmp1
= ~(0x00000020 & generate_cmask (buf1
[3])); break;
2655 case 28: tmp0
= (buf1
[3] >> 0) & 0xFF;
2656 tmp1
= ~(0x00002000 & generate_cmask (buf1
[3])); break;
2657 case 29: tmp0
= (buf1
[3] >> 8) & 0xFF;
2658 tmp1
= ~(0x00200000 & generate_cmask (buf1
[3])); break;
2659 case 30: tmp0
= (buf1
[3] >> 16) & 0xFF;
2660 tmp1
= ~(0x20000000 & generate_cmask (buf1
[3])); break;
2666 if (tmp0
.s0
== ' ') buf0
[0].s0
&= tmp1
.s0
;
2667 if (tmp0
.s1
== ' ') buf0
[0].s1
&= tmp1
.s1
;
2671 if (tmp0
.s0
== ' ') buf0
[1].s0
&= tmp1
.s0
;
2672 if (tmp0
.s1
== ' ') buf0
[1].s1
&= tmp1
.s1
;
2676 if (tmp0
.s0
== ' ') buf0
[2].s0
&= tmp1
.s0
;
2677 if (tmp0
.s1
== ' ') buf0
[2].s1
&= tmp1
.s1
;
2681 if (tmp0
.s0
== ' ') buf0
[3].s0
&= tmp1
.s0
;
2682 if (tmp0
.s1
== ' ') buf0
[3].s1
&= tmp1
.s1
;
2686 if (tmp0
.s0
== ' ') buf1
[0].s0
&= tmp1
.s0
;
2687 if (tmp0
.s1
== ' ') buf1
[0].s1
&= tmp1
.s1
;
2691 if (tmp0
.s0
== ' ') buf1
[1].s0
&= tmp1
.s0
;
2692 if (tmp0
.s1
== ' ') buf1
[1].s1
&= tmp1
.s1
;
2696 if (tmp0
.s0
== ' ') buf1
[2].s0
&= tmp1
.s0
;
2697 if (tmp0
.s1
== ' ') buf1
[2].s1
&= tmp1
.s1
;
2701 if (tmp0
.s0
== ' ') buf1
[3].s0
&= tmp1
.s0
;
2702 if (tmp0
.s1
== ' ') buf1
[3].s1
&= tmp1
.s1
;
2709 if (tmp0
.s0
== ' ') buf0
[0].s0
&= tmp1
.s0
;
2710 if (tmp0
.s1
== ' ') buf0
[0].s1
&= tmp1
.s1
;
2711 if (tmp0
.s2
== ' ') buf0
[0].s2
&= tmp1
.s2
;
2712 if (tmp0
.s3
== ' ') buf0
[0].s3
&= tmp1
.s3
;
2716 if (tmp0
.s0
== ' ') buf0
[1].s0
&= tmp1
.s0
;
2717 if (tmp0
.s1
== ' ') buf0
[1].s1
&= tmp1
.s1
;
2718 if (tmp0
.s2
== ' ') buf0
[1].s2
&= tmp1
.s2
;
2719 if (tmp0
.s3
== ' ') buf0
[1].s3
&= tmp1
.s3
;
2723 if (tmp0
.s0
== ' ') buf0
[2].s0
&= tmp1
.s0
;
2724 if (tmp0
.s1
== ' ') buf0
[2].s1
&= tmp1
.s1
;
2725 if (tmp0
.s2
== ' ') buf0
[2].s2
&= tmp1
.s2
;
2726 if (tmp0
.s3
== ' ') buf0
[2].s3
&= tmp1
.s3
;
2730 if (tmp0
.s0
== ' ') buf0
[3].s0
&= tmp1
.s0
;
2731 if (tmp0
.s1
== ' ') buf0
[3].s1
&= tmp1
.s1
;
2732 if (tmp0
.s2
== ' ') buf0
[3].s2
&= tmp1
.s2
;
2733 if (tmp0
.s3
== ' ') buf0
[3].s3
&= tmp1
.s3
;
2737 if (tmp0
.s0
== ' ') buf1
[0].s0
&= tmp1
.s0
;
2738 if (tmp0
.s1
== ' ') buf1
[0].s1
&= tmp1
.s1
;
2739 if (tmp0
.s2
== ' ') buf1
[0].s2
&= tmp1
.s2
;
2740 if (tmp0
.s3
== ' ') buf1
[0].s3
&= tmp1
.s3
;
2744 if (tmp0
.s0
== ' ') buf1
[1].s0
&= tmp1
.s0
;
2745 if (tmp0
.s1
== ' ') buf1
[1].s1
&= tmp1
.s1
;
2746 if (tmp0
.s2
== ' ') buf1
[1].s2
&= tmp1
.s2
;
2747 if (tmp0
.s3
== ' ') buf1
[1].s3
&= tmp1
.s3
;
2751 if (tmp0
.s0
== ' ') buf1
[2].s0
&= tmp1
.s0
;
2752 if (tmp0
.s1
== ' ') buf1
[2].s1
&= tmp1
.s1
;
2753 if (tmp0
.s2
== ' ') buf1
[2].s2
&= tmp1
.s2
;
2754 if (tmp0
.s3
== ' ') buf1
[2].s3
&= tmp1
.s3
;
2758 if (tmp0
.s0
== ' ') buf1
[3].s0
&= tmp1
.s0
;
2759 if (tmp0
.s1
== ' ') buf1
[3].s1
&= tmp1
.s1
;
2760 if (tmp0
.s2
== ' ') buf1
[3].s2
&= tmp1
.s2
;
2761 if (tmp0
.s3
== ' ') buf1
[3].s3
&= tmp1
.s3
;
2771 u32
apply_rule (const u32 name
, const u32 p0
, const u32 p1
, u32x buf0
[4], u32x buf1
[4], const u32 in_len
)
2773 u32 out_len
= in_len
;
2777 case RULE_OP_MANGLE_LREST
: out_len
= rule_op_mangle_lrest (p0
, p1
, buf0
, buf1
, out_len
); break;
2778 case RULE_OP_MANGLE_UREST
: out_len
= rule_op_mangle_urest (p0
, p1
, buf0
, buf1
, out_len
); break;
2779 case RULE_OP_MANGLE_LREST_UFIRST
: out_len
= rule_op_mangle_lrest_ufirst (p0
, p1
, buf0
, buf1
, out_len
); break;
2780 case RULE_OP_MANGLE_UREST_LFIRST
: out_len
= rule_op_mangle_urest_lfirst (p0
, p1
, buf0
, buf1
, out_len
); break;
2781 case RULE_OP_MANGLE_TREST
: out_len
= rule_op_mangle_trest (p0
, p1
, buf0
, buf1
, out_len
); break;
2782 case RULE_OP_MANGLE_TOGGLE_AT
: out_len
= rule_op_mangle_toggle_at (p0
, p1
, buf0
, buf1
, out_len
); break;
2783 case RULE_OP_MANGLE_REVERSE
: out_len
= rule_op_mangle_reverse (p0
, p1
, buf0
, buf1
, out_len
); break;
2784 case RULE_OP_MANGLE_DUPEWORD
: out_len
= rule_op_mangle_dupeword (p0
, p1
, buf0
, buf1
, out_len
); break;
2785 case RULE_OP_MANGLE_DUPEWORD_TIMES
: out_len
= rule_op_mangle_dupeword_times (p0
, p1
, buf0
, buf1
, out_len
); break;
2786 case RULE_OP_MANGLE_REFLECT
: out_len
= rule_op_mangle_reflect (p0
, p1
, buf0
, buf1
, out_len
); break;
2787 case RULE_OP_MANGLE_APPEND
: out_len
= rule_op_mangle_append (p0
, p1
, buf0
, buf1
, out_len
); break;
2788 case RULE_OP_MANGLE_PREPEND
: out_len
= rule_op_mangle_prepend (p0
, p1
, buf0
, buf1
, out_len
); break;
2789 case RULE_OP_MANGLE_ROTATE_LEFT
: out_len
= rule_op_mangle_rotate_left (p0
, p1
, buf0
, buf1
, out_len
); break;
2790 case RULE_OP_MANGLE_ROTATE_RIGHT
: out_len
= rule_op_mangle_rotate_right (p0
, p1
, buf0
, buf1
, out_len
); break;
2791 case RULE_OP_MANGLE_DELETE_FIRST
: out_len
= rule_op_mangle_delete_first (p0
, p1
, buf0
, buf1
, out_len
); break;
2792 case RULE_OP_MANGLE_DELETE_LAST
: out_len
= rule_op_mangle_delete_last (p0
, p1
, buf0
, buf1
, out_len
); break;
2793 case RULE_OP_MANGLE_DELETE_AT
: out_len
= rule_op_mangle_delete_at (p0
, p1
, buf0
, buf1
, out_len
); break;
2794 case RULE_OP_MANGLE_EXTRACT
: out_len
= rule_op_mangle_extract (p0
, p1
, buf0
, buf1
, out_len
); break;
2795 case RULE_OP_MANGLE_OMIT
: out_len
= rule_op_mangle_omit (p0
, p1
, buf0
, buf1
, out_len
); break;
2796 case RULE_OP_MANGLE_INSERT
: out_len
= rule_op_mangle_insert (p0
, p1
, buf0
, buf1
, out_len
); break;
2797 case RULE_OP_MANGLE_OVERSTRIKE
: out_len
= rule_op_mangle_overstrike (p0
, p1
, buf0
, buf1
, out_len
); break;
2798 case RULE_OP_MANGLE_TRUNCATE_AT
: out_len
= rule_op_mangle_truncate_at (p0
, p1
, buf0
, buf1
, out_len
); break;
2799 case RULE_OP_MANGLE_REPLACE
: out_len
= rule_op_mangle_replace (p0
, p1
, buf0
, buf1
, out_len
); break;
2800 //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
2801 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
2802 case RULE_OP_MANGLE_DUPECHAR_FIRST
: out_len
= rule_op_mangle_dupechar_first (p0
, p1
, buf0
, buf1
, out_len
); break;
2803 case RULE_OP_MANGLE_DUPECHAR_LAST
: out_len
= rule_op_mangle_dupechar_last (p0
, p1
, buf0
, buf1
, out_len
); break;
2804 case RULE_OP_MANGLE_DUPECHAR_ALL
: out_len
= rule_op_mangle_dupechar_all (p0
, p1
, buf0
, buf1
, out_len
); break;
2805 case RULE_OP_MANGLE_SWITCH_FIRST
: out_len
= rule_op_mangle_switch_first (p0
, p1
, buf0
, buf1
, out_len
); break;
2806 case RULE_OP_MANGLE_SWITCH_LAST
: out_len
= rule_op_mangle_switch_last (p0
, p1
, buf0
, buf1
, out_len
); break;
2807 case RULE_OP_MANGLE_SWITCH_AT
: out_len
= rule_op_mangle_switch_at (p0
, p1
, buf0
, buf1
, out_len
); break;
2808 case RULE_OP_MANGLE_CHR_SHIFTL
: out_len
= rule_op_mangle_chr_shiftl (p0
, p1
, buf0
, buf1
, out_len
); break;
2809 case RULE_OP_MANGLE_CHR_SHIFTR
: out_len
= rule_op_mangle_chr_shiftr (p0
, p1
, buf0
, buf1
, out_len
); break;
2810 case RULE_OP_MANGLE_CHR_INCR
: out_len
= rule_op_mangle_chr_incr (p0
, p1
, buf0
, buf1
, out_len
); break;
2811 case RULE_OP_MANGLE_CHR_DECR
: out_len
= rule_op_mangle_chr_decr (p0
, p1
, buf0
, buf1
, out_len
); break;
2812 case RULE_OP_MANGLE_REPLACE_NP1
: out_len
= rule_op_mangle_replace_np1 (p0
, p1
, buf0
, buf1
, out_len
); break;
2813 case RULE_OP_MANGLE_REPLACE_NM1
: out_len
= rule_op_mangle_replace_nm1 (p0
, p1
, buf0
, buf1
, out_len
); break;
2814 case RULE_OP_MANGLE_DUPEBLOCK_FIRST
: out_len
= rule_op_mangle_dupeblock_first (p0
, p1
, buf0
, buf1
, out_len
); break;
2815 case RULE_OP_MANGLE_DUPEBLOCK_LAST
: out_len
= rule_op_mangle_dupeblock_last (p0
, p1
, buf0
, buf1
, out_len
); break;
2816 case RULE_OP_MANGLE_TITLE
: out_len
= rule_op_mangle_title (p0
, p1
, buf0
, buf1
, out_len
); break;
2822 u32
apply_rules (__global u32
*cmds
, u32x buf0
[4], u32x buf1
[4], const u32 len
)
2826 for (u32 i
= 0; cmds
[i
] != 0; i
++)
2828 const u32 cmd
= cmds
[i
];
2830 const u32 name
= (cmd
>> 0) & 0xff;
2831 const u32 p0
= (cmd
>> 8) & 0xff;
2832 const u32 p1
= (cmd
>> 16) & 0xff;
2834 out_len
= apply_rule (name
, p0
, p1
, buf0
, buf1
, out_len
);