2 * Author......: Jens Steube <jens.steube@gmail.com>
6 u32
apply_rule (const u32 name
, const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
);
7 u32
apply_rules (const __global u32
*cmds
, u32 buf0
[4], u32 buf1
[4], const u32 len
);
8 u32
apply_rules_vect (const u32 pw_buf0
[4], const u32 pw_buf1
[4], const u32 pw_len
, const __global kernel_rule_t
*rules_buf
, const u32 il_pos
, u32x w0
[4], u32x w1
[4]);
10 static u32
generate_cmask (u32 buf
)
12 const u32 rmask
= ((buf
& 0x40404040) >> 1)
13 & ~((buf
& 0x80808080) >> 2);
15 const u32 hmask
= (buf
& 0x1f1f1f1f) + 0x05050505;
16 const u32 lmask
= (buf
& 0x1f1f1f1f) + 0x1f1f1f1f;
18 return rmask
& ~hmask
& lmask
;
21 static void truncate_right (u32 w0
[4], u32 w1
[4], const u32 len
)
23 const u32 tmp
= (1 << ((len
% 4) * 8)) - 1;
74 static void truncate_left (u32 w0
[4], u32 w1
[4], const u32 len
)
76 const u32 tmp
= ~((1 << ((len
% 4) * 8)) - 1);
127 static void lshift_block (const u32 in0
[4], const u32 in1
[4], u32 out0
[4], u32 out1
[4])
130 out0
[0] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
131 out0
[1] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
132 out0
[2] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
133 out0
[3] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
134 out1
[0] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
135 out1
[1] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
136 out1
[2] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
137 out1
[3] = __byte_perm_S (in1
[3], 0, 0x4321);
140 #if defined IS_AMD || defined IS_GENERIC
141 out0
[0] = amd_bytealign_S (in0
[1], in0
[0], 1);
142 out0
[1] = amd_bytealign_S (in0
[2], in0
[1], 1);
143 out0
[2] = amd_bytealign_S (in0
[3], in0
[2], 1);
144 out0
[3] = amd_bytealign_S (in1
[0], in0
[3], 1);
145 out1
[0] = amd_bytealign_S (in1
[1], in1
[0], 1);
146 out1
[1] = amd_bytealign_S (in1
[2], in1
[1], 1);
147 out1
[2] = amd_bytealign_S (in1
[3], in1
[2], 1);
148 out1
[3] = amd_bytealign_S ( 0, in1
[3], 1);
152 static void rshift_block (const u32 in0
[4], const u32 in1
[4], u32 out0
[4], u32 out1
[4])
155 out1
[3] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
156 out1
[2] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
157 out1
[1] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
158 out1
[0] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
159 out0
[3] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
160 out0
[2] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
161 out0
[1] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
162 out0
[0] = __byte_perm_S ( 0, in0
[0], 0x6543);
165 #if defined IS_AMD || defined IS_GENERIC
166 out1
[3] = amd_bytealign_S (in1
[3], in1
[2], 3);
167 out1
[2] = amd_bytealign_S (in1
[2], in1
[1], 3);
168 out1
[1] = amd_bytealign_S (in1
[1], in1
[0], 3);
169 out1
[0] = amd_bytealign_S (in1
[0], in0
[3], 3);
170 out0
[3] = amd_bytealign_S (in0
[3], in0
[2], 3);
171 out0
[2] = amd_bytealign_S (in0
[2], in0
[1], 3);
172 out0
[1] = amd_bytealign_S (in0
[1], in0
[0], 3);
173 out0
[0] = amd_bytealign_S (in0
[0], 0, 3);
177 static void lshift_block_N (const u32 in0
[4], const u32 in1
[4], u32 out0
[4], u32 out1
[4], const u32 num
)
182 case 0: out0
[0] = in0
[0];
191 case 1: out0
[0] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
192 out0
[1] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
193 out0
[2] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
194 out0
[3] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
195 out1
[0] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
196 out1
[1] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
197 out1
[2] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
198 out1
[3] = __byte_perm_S (in1
[3], 0, 0x4321);
200 case 2: out0
[0] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
201 out0
[1] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
202 out0
[2] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
203 out0
[3] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
204 out1
[0] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
205 out1
[1] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
206 out1
[2] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
207 out1
[3] = __byte_perm_S (in1
[3], 0, 0x5432);
209 case 3: out0
[0] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
210 out0
[1] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
211 out0
[2] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
212 out0
[3] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
213 out1
[0] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
214 out1
[1] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
215 out1
[2] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
216 out1
[3] = __byte_perm_S (in1
[3], 0, 0x6543);
218 case 4: out0
[0] = in0
[1];
227 case 5: out0
[0] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
228 out0
[1] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
229 out0
[2] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
230 out0
[3] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
231 out1
[0] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
232 out1
[1] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
233 out1
[2] = __byte_perm_S (in1
[3], 0, 0x4321);
236 case 6: out0
[0] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
237 out0
[1] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
238 out0
[2] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
239 out0
[3] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
240 out1
[0] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
241 out1
[1] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
242 out1
[2] = __byte_perm_S (in1
[3], 0, 0x5432);
245 case 7: out0
[0] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
246 out0
[1] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
247 out0
[2] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
248 out0
[3] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
249 out1
[0] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
250 out1
[1] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
251 out1
[2] = __byte_perm_S (in1
[3], 0, 0x6543);
254 case 8: out0
[0] = in0
[2];
263 case 9: out0
[0] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
264 out0
[1] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
265 out0
[2] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
266 out0
[3] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
267 out1
[0] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
268 out1
[1] = __byte_perm_S (in1
[3], 0, 0x4321);
272 case 10: out0
[0] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
273 out0
[1] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
274 out0
[2] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
275 out0
[3] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
276 out1
[0] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
277 out1
[1] = __byte_perm_S (in1
[3], 0, 0x5432);
281 case 11: out0
[0] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
282 out0
[1] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
283 out0
[2] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
284 out0
[3] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
285 out1
[0] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
286 out1
[1] = __byte_perm_S (in1
[3], 0, 0x6543);
290 case 12: out0
[0] = in0
[3];
300 out0
[0] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
301 out0
[1] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
302 out0
[2] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
303 out0
[3] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
304 out1
[0] = __byte_perm_S (in1
[3], 0, 0x4321);
309 case 14: out0
[0] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
310 out0
[1] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
311 out0
[2] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
312 out0
[3] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
313 out1
[0] = __byte_perm_S (in1
[3], 0, 0x5432);
318 case 15: out0
[0] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
319 out0
[1] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
320 out0
[2] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
321 out0
[3] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
322 out1
[0] = __byte_perm_S (in1
[3], 0, 0x6543);
327 case 16: out0
[0] = in1
[0];
336 case 17: out0
[0] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
337 out0
[1] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
338 out0
[2] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
339 out0
[3] = __byte_perm_S (in1
[3], 0, 0x4321);
345 case 18: out0
[0] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
346 out0
[1] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
347 out0
[2] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
348 out0
[3] = __byte_perm_S (in1
[3], 0, 0x5432);
354 case 19: out0
[0] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
355 out0
[1] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
356 out0
[2] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
357 out0
[3] = __byte_perm_S (in1
[3], 0, 0x6543);
363 case 20: out0
[0] = in1
[1];
372 case 21: out0
[0] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
373 out0
[1] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
374 out0
[2] = __byte_perm_S (in1
[3], 0, 0x4321);
381 case 22: out0
[0] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
382 out0
[1] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
383 out0
[2] = __byte_perm_S (in1
[3], 0, 0x5432);
390 case 23: out0
[0] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
391 out0
[1] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
392 out0
[2] = __byte_perm_S (in1
[3], 0, 0x6543);
399 case 24: out0
[0] = in1
[2];
408 case 25: out0
[0] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
409 out0
[1] = __byte_perm_S (in1
[3], 0, 0x4321);
417 case 26: out0
[0] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
418 out0
[1] = __byte_perm_S (in1
[3], 0, 0x5432);
426 case 27: out0
[0] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
427 out0
[1] = __byte_perm_S (in1
[3], 0, 0x6543);
435 case 28: out0
[0] = in1
[3];
444 case 29: out0
[0] = __byte_perm_S (in1
[3], 0, 0x4321);
453 case 30: out0
[0] = __byte_perm_S (in1
[3], 0, 0x5432);
462 case 31: out0
[0] = __byte_perm_S (in1
[3], 0, 0x6543);
474 #if defined IS_AMD || defined IS_GENERIC
477 case 0: out0
[0] = in0
[0];
486 case 1: out0
[0] = amd_bytealign_S (in0
[1], in0
[0], 1);
487 out0
[1] = amd_bytealign_S (in0
[2], in0
[1], 1);
488 out0
[2] = amd_bytealign_S (in0
[3], in0
[2], 1);
489 out0
[3] = amd_bytealign_S (in1
[0], in0
[3], 1);
490 out1
[0] = amd_bytealign_S (in1
[1], in1
[0], 1);
491 out1
[1] = amd_bytealign_S (in1
[2], in1
[1], 1);
492 out1
[2] = amd_bytealign_S (in1
[3], in1
[2], 1);
493 out1
[3] = amd_bytealign_S ( 0, in1
[3], 1);
495 case 2: out0
[0] = amd_bytealign_S (in0
[1], in0
[0], 2);
496 out0
[1] = amd_bytealign_S (in0
[2], in0
[1], 2);
497 out0
[2] = amd_bytealign_S (in0
[3], in0
[2], 2);
498 out0
[3] = amd_bytealign_S (in1
[0], in0
[3], 2);
499 out1
[0] = amd_bytealign_S (in1
[1], in1
[0], 2);
500 out1
[1] = amd_bytealign_S (in1
[2], in1
[1], 2);
501 out1
[2] = amd_bytealign_S (in1
[3], in1
[2], 2);
502 out1
[3] = amd_bytealign_S ( 0, in1
[3], 2);
504 case 3: out0
[0] = amd_bytealign_S (in0
[1], in0
[0], 3);
505 out0
[1] = amd_bytealign_S (in0
[2], in0
[1], 3);
506 out0
[2] = amd_bytealign_S (in0
[3], in0
[2], 3);
507 out0
[3] = amd_bytealign_S (in1
[0], in0
[3], 3);
508 out1
[0] = amd_bytealign_S (in1
[1], in1
[0], 3);
509 out1
[1] = amd_bytealign_S (in1
[2], in1
[1], 3);
510 out1
[2] = amd_bytealign_S (in1
[3], in1
[2], 3);
511 out1
[3] = amd_bytealign_S ( 0, in1
[3], 3);
513 case 4: out0
[0] = in0
[1];
522 case 5: out0
[0] = amd_bytealign_S (in0
[2], in0
[1], 1);
523 out0
[1] = amd_bytealign_S (in0
[3], in0
[2], 1);
524 out0
[2] = amd_bytealign_S (in1
[0], in0
[3], 1);
525 out0
[3] = amd_bytealign_S (in1
[1], in1
[0], 1);
526 out1
[0] = amd_bytealign_S (in1
[2], in1
[1], 1);
527 out1
[1] = amd_bytealign_S (in1
[3], in1
[2], 1);
528 out1
[2] = amd_bytealign_S ( 0, in1
[3], 1);
531 case 6: out0
[0] = amd_bytealign_S (in0
[2], in0
[1], 2);
532 out0
[1] = amd_bytealign_S (in0
[3], in0
[2], 2);
533 out0
[2] = amd_bytealign_S (in1
[0], in0
[3], 2);
534 out0
[3] = amd_bytealign_S (in1
[1], in1
[0], 2);
535 out1
[0] = amd_bytealign_S (in1
[2], in1
[1], 2);
536 out1
[1] = amd_bytealign_S (in1
[3], in1
[2], 2);
537 out1
[2] = amd_bytealign_S ( 0, in1
[3], 2);
540 case 7: out0
[0] = amd_bytealign_S (in0
[2], in0
[1], 3);
541 out0
[1] = amd_bytealign_S (in0
[3], in0
[2], 3);
542 out0
[2] = amd_bytealign_S (in1
[0], in0
[3], 3);
543 out0
[3] = amd_bytealign_S (in1
[1], in1
[0], 3);
544 out1
[0] = amd_bytealign_S (in1
[2], in1
[1], 3);
545 out1
[1] = amd_bytealign_S (in1
[3], in1
[2], 3);
546 out1
[2] = amd_bytealign_S ( 0, in1
[3], 3);
549 case 8: out0
[0] = in0
[2];
558 case 9: out0
[0] = amd_bytealign_S (in0
[3], in0
[2], 1);
559 out0
[1] = amd_bytealign_S (in1
[0], in0
[3], 1);
560 out0
[2] = amd_bytealign_S (in1
[1], in1
[0], 1);
561 out0
[3] = amd_bytealign_S (in1
[2], in1
[1], 1);
562 out1
[0] = amd_bytealign_S (in1
[3], in1
[2], 1);
563 out1
[1] = amd_bytealign_S ( 0, in1
[3], 1);
567 case 10: out0
[0] = amd_bytealign_S (in0
[3], in0
[2], 2);
568 out0
[1] = amd_bytealign_S (in1
[0], in0
[3], 2);
569 out0
[2] = amd_bytealign_S (in1
[1], in1
[0], 2);
570 out0
[3] = amd_bytealign_S (in1
[2], in1
[1], 2);
571 out1
[0] = amd_bytealign_S (in1
[3], in1
[2], 2);
572 out1
[1] = amd_bytealign_S ( 0, in1
[3], 2);
576 case 11: out0
[0] = amd_bytealign_S (in0
[3], in0
[2], 3);
577 out0
[1] = amd_bytealign_S (in1
[0], in0
[3], 3);
578 out0
[2] = amd_bytealign_S (in1
[1], in1
[0], 3);
579 out0
[3] = amd_bytealign_S (in1
[2], in1
[1], 3);
580 out1
[0] = amd_bytealign_S (in1
[3], in1
[2], 3);
581 out1
[1] = amd_bytealign_S ( 0, in1
[3], 3);
585 case 12: out0
[0] = in0
[3];
594 case 13: out0
[0] = amd_bytealign_S (in1
[0], in0
[3], 1);
595 out0
[1] = amd_bytealign_S (in1
[1], in1
[0], 1);
596 out0
[2] = amd_bytealign_S (in1
[2], in1
[1], 1);
597 out0
[3] = amd_bytealign_S (in1
[3], in1
[2], 1);
598 out1
[0] = amd_bytealign_S ( 0, in1
[3], 1);
603 case 14: out0
[0] = amd_bytealign_S (in1
[0], in0
[3], 2);
604 out0
[1] = amd_bytealign_S (in1
[1], in1
[0], 2);
605 out0
[2] = amd_bytealign_S (in1
[2], in1
[1], 2);
606 out0
[3] = amd_bytealign_S (in1
[3], in1
[2], 2);
607 out1
[0] = amd_bytealign_S ( 0, in1
[3], 2);
612 case 15: out0
[0] = amd_bytealign_S (in1
[0], in0
[3], 3);
613 out0
[1] = amd_bytealign_S (in1
[1], in1
[0], 3);
614 out0
[2] = amd_bytealign_S (in1
[2], in1
[1], 3);
615 out0
[3] = amd_bytealign_S (in1
[3], in1
[2], 3);
616 out1
[0] = amd_bytealign_S ( 0, in1
[3], 3);
621 case 16: out0
[0] = in1
[0];
630 case 17: out0
[0] = amd_bytealign_S (in1
[1], in1
[0], 1);
631 out0
[1] = amd_bytealign_S (in1
[2], in1
[1], 1);
632 out0
[2] = amd_bytealign_S (in1
[3], in1
[2], 1);
633 out0
[3] = amd_bytealign_S ( 0, in1
[3], 1);
639 case 18: out0
[0] = amd_bytealign_S (in1
[1], in1
[0], 2);
640 out0
[1] = amd_bytealign_S (in1
[2], in1
[1], 2);
641 out0
[2] = amd_bytealign_S (in1
[3], in1
[2], 2);
642 out0
[3] = amd_bytealign_S ( 0, in1
[3], 2);
648 case 19: out0
[0] = amd_bytealign_S (in1
[1], in1
[0], 3);
649 out0
[1] = amd_bytealign_S (in1
[2], in1
[1], 3);
650 out0
[2] = amd_bytealign_S (in1
[3], in1
[2], 3);
651 out0
[3] = amd_bytealign_S ( 0, in1
[3], 3);
657 case 20: out0
[0] = in1
[1];
666 case 21: out0
[0] = amd_bytealign_S (in1
[2], in1
[1], 1);
667 out0
[1] = amd_bytealign_S (in1
[3], in1
[2], 1);
668 out0
[2] = amd_bytealign_S ( 0, in1
[3], 1);
675 case 22: out0
[0] = amd_bytealign_S (in1
[2], in1
[1], 2);
676 out0
[1] = amd_bytealign_S (in1
[3], in1
[2], 2);
677 out0
[2] = amd_bytealign_S ( 0, in1
[3], 2);
684 case 23: out0
[0] = amd_bytealign_S (in1
[2], in1
[1], 3);
685 out0
[1] = amd_bytealign_S (in1
[3], in1
[2], 3);
686 out0
[2] = amd_bytealign_S ( 0, in1
[3], 3);
693 case 24: out0
[0] = in1
[2];
702 case 25: out0
[0] = amd_bytealign_S (in1
[3], in1
[2], 1);
703 out0
[1] = amd_bytealign_S ( 0, in1
[3], 1);
711 case 26: out0
[0] = amd_bytealign_S (in1
[3], in1
[2], 2);
712 out0
[1] = amd_bytealign_S ( 0, in1
[3], 2);
720 case 27: out0
[0] = amd_bytealign_S (in1
[3], in1
[2], 3);
721 out0
[1] = amd_bytealign_S ( 0, in1
[3], 3);
729 case 28: out0
[0] = in1
[3];
738 case 29: out0
[0] = amd_bytealign_S ( 0, in1
[3], 1);
747 case 30: out0
[0] = amd_bytealign_S ( 0, in1
[3], 2);
756 case 31: out0
[0] = amd_bytealign_S ( 0, in1
[3], 3);
769 static void rshift_block_N (const u32 in0
[4], const u32 in1
[4], u32 out0
[4], u32 out1
[4], const u32 num
)
774 case 0: out1
[3] = in1
[3];
783 case 1: out1
[3] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
784 out1
[2] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
785 out1
[1] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
786 out1
[0] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
787 out0
[3] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
788 out0
[2] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
789 out0
[1] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
790 out0
[0] = __byte_perm_S ( 0, in0
[0], 0x6543);
792 case 2: out1
[3] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
793 out1
[2] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
794 out1
[1] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
795 out1
[0] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
796 out0
[3] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
797 out0
[2] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
798 out0
[1] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
799 out0
[0] = __byte_perm_S ( 0, in0
[0], 0x5432);
801 case 3: out1
[3] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
802 out1
[2] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
803 out1
[1] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
804 out1
[0] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
805 out0
[3] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
806 out0
[2] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
807 out0
[1] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
808 out0
[0] = __byte_perm_S ( 0, in0
[0], 0x4321);
810 case 4: out1
[3] = in1
[2];
819 case 5: out1
[3] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
820 out1
[2] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
821 out1
[1] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
822 out1
[0] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
823 out0
[3] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
824 out0
[2] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
825 out0
[1] = __byte_perm_S ( 0, in0
[0], 0x6543);
828 case 6: out1
[3] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
829 out1
[2] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
830 out1
[1] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
831 out1
[0] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
832 out0
[3] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
833 out0
[2] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
834 out0
[1] = __byte_perm_S ( 0, in0
[0], 0x5432);
837 case 7: out1
[3] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
838 out1
[2] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
839 out1
[1] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
840 out1
[0] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
841 out0
[3] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
842 out0
[2] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
843 out0
[1] = __byte_perm_S ( 0, in0
[0], 0x4321);
846 case 8: out1
[3] = in1
[1];
855 case 9: out1
[3] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
856 out1
[2] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
857 out1
[1] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
858 out1
[0] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
859 out0
[3] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
860 out0
[2] = __byte_perm_S ( 0, in0
[0], 0x6543);
864 case 10: out1
[3] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
865 out1
[2] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
866 out1
[1] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
867 out1
[0] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
868 out0
[3] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
869 out0
[2] = __byte_perm_S ( 0, in0
[0], 0x5432);
873 case 11: out1
[3] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
874 out1
[2] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
875 out1
[1] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
876 out1
[0] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
877 out0
[3] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
878 out0
[2] = __byte_perm_S ( 0, in0
[0], 0x4321);
882 case 12: out1
[3] = in1
[0];
891 case 13: out1
[3] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
892 out1
[2] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
893 out1
[1] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
894 out1
[0] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
895 out0
[3] = __byte_perm_S ( 0, in0
[0], 0x6543);
900 case 14: out1
[3] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
901 out1
[2] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
902 out1
[1] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
903 out1
[0] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
904 out0
[3] = __byte_perm_S ( 0, in0
[0], 0x5432);
909 case 15: out1
[3] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
910 out1
[2] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
911 out1
[1] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
912 out1
[0] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
913 out0
[3] = __byte_perm_S ( 0, in0
[0], 0x4321);
918 case 16: out1
[3] = in0
[3];
927 case 17: out1
[3] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
928 out1
[2] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
929 out1
[1] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
930 out1
[0] = __byte_perm_S ( 0, in0
[0], 0x6543);
936 case 18: out1
[3] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
937 out1
[2] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
938 out1
[1] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
939 out1
[0] = __byte_perm_S ( 0, in0
[0], 0x5432);
945 case 19: out1
[3] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
946 out1
[2] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
947 out1
[1] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
948 out1
[0] = __byte_perm_S ( 0, in0
[0], 0x4321);
954 case 20: out1
[3] = in0
[2];
963 case 21: out1
[3] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
964 out1
[2] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
965 out1
[1] = __byte_perm_S ( 0, in0
[0], 0x6543);
972 case 22: out1
[3] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
973 out1
[2] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
974 out1
[1] = __byte_perm_S ( 0, in0
[0], 0x5432);
981 case 23: out1
[3] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
982 out1
[2] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
983 out1
[1] = __byte_perm_S ( 0, in0
[0], 0x4321);
990 case 24: out1
[3] = in0
[1];
999 case 25: out1
[3] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
1000 out1
[2] = __byte_perm_S ( 0, in0
[0], 0x6543);
1008 case 26: out1
[3] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
1009 out1
[2] = __byte_perm_S ( 0, in0
[0], 0x5432);
1017 case 27: out1
[3] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
1018 out1
[2] = __byte_perm_S ( 0, in0
[0], 0x4321);
1026 case 28: out1
[3] = in0
[0];
1035 case 29: out1
[3] = __byte_perm_S ( 0, in0
[0], 0x6543);
1044 case 30: out1
[3] = __byte_perm_S ( 0, in0
[0], 0x5432);
1053 case 31: out1
[3] = __byte_perm_S ( 0, in0
[0], 0x4321);
1065 #if defined IS_AMD || defined IS_GENERIC
1068 case 0: out1
[3] = in1
[3];
1077 case 1: out1
[3] = amd_bytealign_S (in1
[3], in1
[2], 3);
1078 out1
[2] = amd_bytealign_S (in1
[2], in1
[1], 3);
1079 out1
[1] = amd_bytealign_S (in1
[1], in1
[0], 3);
1080 out1
[0] = amd_bytealign_S (in1
[0], in0
[3], 3);
1081 out0
[3] = amd_bytealign_S (in0
[3], in0
[2], 3);
1082 out0
[2] = amd_bytealign_S (in0
[2], in0
[1], 3);
1083 out0
[1] = amd_bytealign_S (in0
[1], in0
[0], 3);
1084 out0
[0] = amd_bytealign_S (in0
[0], 0, 3);
1086 case 2: out1
[3] = amd_bytealign_S (in1
[3], in1
[2], 2);
1087 out1
[2] = amd_bytealign_S (in1
[2], in1
[1], 2);
1088 out1
[1] = amd_bytealign_S (in1
[1], in1
[0], 2);
1089 out1
[0] = amd_bytealign_S (in1
[0], in0
[3], 2);
1090 out0
[3] = amd_bytealign_S (in0
[3], in0
[2], 2);
1091 out0
[2] = amd_bytealign_S (in0
[2], in0
[1], 2);
1092 out0
[1] = amd_bytealign_S (in0
[1], in0
[0], 2);
1093 out0
[0] = amd_bytealign_S (in0
[0], 0, 2);
1095 case 3: out1
[3] = amd_bytealign_S (in1
[3], in1
[2], 1);
1096 out1
[2] = amd_bytealign_S (in1
[2], in1
[1], 1);
1097 out1
[1] = amd_bytealign_S (in1
[1], in1
[0], 1);
1098 out1
[0] = amd_bytealign_S (in1
[0], in0
[3], 1);
1099 out0
[3] = amd_bytealign_S (in0
[3], in0
[2], 1);
1100 out0
[2] = amd_bytealign_S (in0
[2], in0
[1], 1);
1101 out0
[1] = amd_bytealign_S (in0
[1], in0
[0], 1);
1102 out0
[0] = amd_bytealign_S (in0
[0], 0, 1);
1104 case 4: out1
[3] = in1
[2];
1113 case 5: out1
[3] = amd_bytealign_S (in1
[2], in1
[1], 3);
1114 out1
[2] = amd_bytealign_S (in1
[1], in1
[0], 3);
1115 out1
[1] = amd_bytealign_S (in1
[0], in0
[3], 3);
1116 out1
[0] = amd_bytealign_S (in0
[3], in0
[2], 3);
1117 out0
[3] = amd_bytealign_S (in0
[2], in0
[1], 3);
1118 out0
[2] = amd_bytealign_S (in0
[1], in0
[0], 3);
1119 out0
[1] = amd_bytealign_S (in0
[0], 0, 3);
1122 case 6: out1
[3] = amd_bytealign_S (in1
[2], in1
[1], 2);
1123 out1
[2] = amd_bytealign_S (in1
[1], in1
[0], 2);
1124 out1
[1] = amd_bytealign_S (in1
[0], in0
[3], 2);
1125 out1
[0] = amd_bytealign_S (in0
[3], in0
[2], 2);
1126 out0
[3] = amd_bytealign_S (in0
[2], in0
[1], 2);
1127 out0
[2] = amd_bytealign_S (in0
[1], in0
[0], 2);
1128 out0
[1] = amd_bytealign_S (in0
[0], 0, 2);
1131 case 7: out1
[3] = amd_bytealign_S (in1
[2], in1
[1], 1);
1132 out1
[2] = amd_bytealign_S (in1
[1], in1
[0], 1);
1133 out1
[1] = amd_bytealign_S (in1
[0], in0
[3], 1);
1134 out1
[0] = amd_bytealign_S (in0
[3], in0
[2], 1);
1135 out0
[3] = amd_bytealign_S (in0
[2], in0
[1], 1);
1136 out0
[2] = amd_bytealign_S (in0
[1], in0
[0], 1);
1137 out0
[1] = amd_bytealign_S (in0
[0], 0, 1);
1140 case 8: out1
[3] = in1
[1];
1149 case 9: out1
[3] = amd_bytealign_S (in1
[1], in1
[0], 3);
1150 out1
[2] = amd_bytealign_S (in1
[0], in0
[3], 3);
1151 out1
[1] = amd_bytealign_S (in0
[3], in0
[2], 3);
1152 out1
[0] = amd_bytealign_S (in0
[2], in0
[1], 3);
1153 out0
[3] = amd_bytealign_S (in0
[1], in0
[0], 3);
1154 out0
[2] = amd_bytealign_S (in0
[0], 0, 3);
1158 case 10: out1
[3] = amd_bytealign_S (in1
[1], in1
[0], 2);
1159 out1
[2] = amd_bytealign_S (in1
[0], in0
[3], 2);
1160 out1
[1] = amd_bytealign_S (in0
[3], in0
[2], 2);
1161 out1
[0] = amd_bytealign_S (in0
[2], in0
[1], 2);
1162 out0
[3] = amd_bytealign_S (in0
[1], in0
[0], 2);
1163 out0
[2] = amd_bytealign_S (in0
[0], 0, 2);
1167 case 11: out1
[3] = amd_bytealign_S (in1
[1], in1
[0], 1);
1168 out1
[2] = amd_bytealign_S (in1
[0], in0
[3], 1);
1169 out1
[1] = amd_bytealign_S (in0
[3], in0
[2], 1);
1170 out1
[0] = amd_bytealign_S (in0
[2], in0
[1], 1);
1171 out0
[3] = amd_bytealign_S (in0
[1], in0
[0], 1);
1172 out0
[2] = amd_bytealign_S (in0
[0], 0, 1);
1176 case 12: out1
[3] = in1
[0];
1185 case 13: out1
[3] = amd_bytealign_S (in1
[0], in0
[3], 3);
1186 out1
[2] = amd_bytealign_S (in0
[3], in0
[2], 3);
1187 out1
[1] = amd_bytealign_S (in0
[2], in0
[1], 3);
1188 out1
[0] = amd_bytealign_S (in0
[1], in0
[0], 3);
1189 out0
[3] = amd_bytealign_S (in0
[0], 0, 3);
1194 case 14: out1
[3] = amd_bytealign_S (in1
[0], in0
[3], 2);
1195 out1
[2] = amd_bytealign_S (in0
[3], in0
[2], 2);
1196 out1
[1] = amd_bytealign_S (in0
[2], in0
[1], 2);
1197 out1
[0] = amd_bytealign_S (in0
[1], in0
[0], 2);
1198 out0
[3] = amd_bytealign_S (in0
[0], 0, 2);
1203 case 15: out1
[3] = amd_bytealign_S (in1
[0], in0
[3], 1);
1204 out1
[2] = amd_bytealign_S (in0
[3], in0
[2], 1);
1205 out1
[1] = amd_bytealign_S (in0
[2], in0
[1], 1);
1206 out1
[0] = amd_bytealign_S (in0
[1], in0
[0], 1);
1207 out0
[3] = amd_bytealign_S (in0
[0], 0, 1);
1212 case 16: out1
[3] = in0
[3];
1221 case 17: out1
[3] = amd_bytealign_S (in0
[3], in0
[2], 3);
1222 out1
[2] = amd_bytealign_S (in0
[2], in0
[1], 3);
1223 out1
[1] = amd_bytealign_S (in0
[1], in0
[0], 3);
1224 out1
[0] = amd_bytealign_S (in0
[0], 0, 3);
1230 case 18: out1
[3] = amd_bytealign_S (in0
[3], in0
[2], 2);
1231 out1
[2] = amd_bytealign_S (in0
[2], in0
[1], 2);
1232 out1
[1] = amd_bytealign_S (in0
[1], in0
[0], 2);
1233 out1
[0] = amd_bytealign_S (in0
[0], 0, 2);
1239 case 19: out1
[3] = amd_bytealign_S (in0
[3], in0
[2], 1);
1240 out1
[2] = amd_bytealign_S (in0
[2], in0
[1], 1);
1241 out1
[1] = amd_bytealign_S (in0
[1], in0
[0], 1);
1242 out1
[0] = amd_bytealign_S (in0
[0], 0, 1);
1248 case 20: out1
[3] = in0
[2];
1257 case 21: out1
[3] = amd_bytealign_S (in0
[2], in0
[1], 3);
1258 out1
[2] = amd_bytealign_S (in0
[1], in0
[0], 3);
1259 out1
[1] = amd_bytealign_S (in0
[0], 0, 3);
1266 case 22: out1
[3] = amd_bytealign_S (in0
[2], in0
[1], 2);
1267 out1
[2] = amd_bytealign_S (in0
[1], in0
[0], 2);
1268 out1
[1] = amd_bytealign_S (in0
[0], 0, 2);
1275 case 23: out1
[3] = amd_bytealign_S (in0
[2], in0
[1], 1);
1276 out1
[2] = amd_bytealign_S (in0
[1], in0
[0], 1);
1277 out1
[1] = amd_bytealign_S (in0
[0], 0, 1);
1284 case 24: out1
[3] = in0
[1];
1293 case 25: out1
[3] = amd_bytealign_S (in0
[1], in0
[0], 3);
1294 out1
[2] = amd_bytealign_S (in0
[0], 0, 3);
1302 case 26: out1
[3] = amd_bytealign_S (in0
[1], in0
[0], 2);
1303 out1
[2] = amd_bytealign_S (in0
[0], 0, 2);
1311 case 27: out1
[3] = amd_bytealign_S (in0
[1], in0
[0], 1);
1312 out1
[2] = amd_bytealign_S (in0
[0], 0, 1);
1320 case 28: out1
[3] = in0
[0];
1329 case 29: out1
[3] = amd_bytealign_S (in0
[0], 0, 3);
1338 case 30: out1
[3] = amd_bytealign_S (in0
[0], 0, 2);
1347 case 31: out1
[3] = amd_bytealign_S (in0
[0], 0, 1);
1360 static void append_block1 (const u32 offset
, u32 dst0
[4], u32 dst1
[4], const u32 src_r0
)
1366 case 0: tmp
[0] = src_r0
;
1369 case 1: tmp
[0] = src_r0
<< 8;
1370 tmp
[1] = src_r0
>> 24;
1372 case 2: tmp
[0] = src_r0
<< 16;
1373 tmp
[1] = src_r0
>> 16;
1375 case 3: tmp
[0] = src_r0
<< 24;
1376 tmp
[1] = src_r0
>> 8;
1382 case 0: dst0
[0] |= tmp
[0];
1385 case 1: dst0
[1] |= tmp
[0];
1388 case 2: dst0
[2] |= tmp
[0];
1391 case 3: dst0
[3] |= tmp
[0];
1394 case 4: dst1
[0] |= tmp
[0];
1397 case 5: dst1
[1] |= tmp
[0];
1400 case 6: dst1
[2] |= tmp
[0];
1403 case 7: dst1
[3] |= tmp
[0];
1408 static void append_block8 (const u32 offset
, u32 dst0
[4], u32 dst1
[4], const u32 src_l0
[4], const u32 src_l1
[4], const u32 src_r0
[4], const u32 src_r1
[4])
1414 dst0
[0] = src_r0
[0];
1415 dst0
[1] = src_r0
[1];
1416 dst0
[2] = src_r0
[2];
1417 dst0
[3] = src_r0
[3];
1418 dst1
[0] = src_r1
[0];
1419 dst1
[1] = src_r1
[1];
1420 dst1
[2] = src_r1
[2];
1421 dst1
[3] = src_r1
[3];
1425 dst0
[0] = __byte_perm_S (src_l0
[0], src_r0
[0], 0x6540);
1426 dst0
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1427 dst0
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1428 dst0
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1429 dst1
[0] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x6543);
1430 dst1
[1] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x6543);
1431 dst1
[2] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x6543);
1432 dst1
[3] = __byte_perm_S (src_r1
[2], src_r1
[3], 0x6543);
1436 dst0
[0] = __byte_perm_S (src_l0
[0], src_r0
[0], 0x5410);
1437 dst0
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1438 dst0
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1439 dst0
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1440 dst1
[0] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x5432);
1441 dst1
[1] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x5432);
1442 dst1
[2] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x5432);
1443 dst1
[3] = __byte_perm_S (src_r1
[2], src_r1
[3], 0x5432);
1447 dst0
[0] = __byte_perm_S (src_l0
[0], src_r0
[0], 0x4210);
1448 dst0
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1449 dst0
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1450 dst0
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1451 dst1
[0] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x4321);
1452 dst1
[1] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x4321);
1453 dst1
[2] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x4321);
1454 dst1
[3] = __byte_perm_S (src_r1
[2], src_r1
[3], 0x4321);
1458 dst0
[1] = src_r0
[0];
1459 dst0
[2] = src_r0
[1];
1460 dst0
[3] = src_r0
[2];
1461 dst1
[0] = src_r0
[3];
1462 dst1
[1] = src_r1
[0];
1463 dst1
[2] = src_r1
[1];
1464 dst1
[3] = src_r1
[2];
1468 dst0
[1] = __byte_perm_S (src_l0
[1], src_r0
[0], 0x6540);
1469 dst0
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1470 dst0
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1471 dst1
[0] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1472 dst1
[1] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x6543);
1473 dst1
[2] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x6543);
1474 dst1
[3] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x6543);
1478 dst0
[1] = __byte_perm_S (src_l0
[1], src_r0
[0], 0x5410);
1479 dst0
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1480 dst0
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1481 dst1
[0] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1482 dst1
[1] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x5432);
1483 dst1
[2] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x5432);
1484 dst1
[3] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x5432);
1488 dst0
[1] = __byte_perm_S (src_l0
[1], src_r0
[0], 0x4210);
1489 dst0
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1490 dst0
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1491 dst1
[0] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1492 dst1
[1] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x4321);
1493 dst1
[2] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x4321);
1494 dst1
[3] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x4321);
1498 dst0
[2] = src_r0
[0];
1499 dst0
[3] = src_r0
[1];
1500 dst1
[0] = src_r0
[2];
1501 dst1
[1] = src_r0
[3];
1502 dst1
[2] = src_r1
[0];
1503 dst1
[3] = src_r1
[1];
1507 dst0
[2] = __byte_perm_S (src_l0
[2], src_r0
[0], 0x6540);
1508 dst0
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1509 dst1
[0] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1510 dst1
[1] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1511 dst1
[2] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x6543);
1512 dst1
[3] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x6543);
1516 dst0
[2] = __byte_perm_S (src_l0
[2], src_r0
[0], 0x5410);
1517 dst0
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1518 dst1
[0] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1519 dst1
[1] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1520 dst1
[2] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x5432);
1521 dst1
[3] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x5432);
1525 dst0
[2] = __byte_perm_S (src_l0
[2], src_r0
[0], 0x4210);
1526 dst0
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1527 dst1
[0] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1528 dst1
[1] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1529 dst1
[2] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x4321);
1530 dst1
[3] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x4321);
1534 dst0
[3] = src_r0
[0];
1535 dst1
[0] = src_r0
[1];
1536 dst1
[1] = src_r0
[2];
1537 dst1
[2] = src_r0
[3];
1538 dst1
[3] = src_r1
[0];
1542 dst0
[3] = __byte_perm_S (src_l0
[3], src_r0
[0], 0x6540);
1543 dst1
[0] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1544 dst1
[1] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1545 dst1
[2] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1546 dst1
[3] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x6543);
1550 dst0
[3] = __byte_perm_S (src_l0
[3], src_r0
[0], 0x5410);
1551 dst1
[0] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1552 dst1
[1] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1553 dst1
[2] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1554 dst1
[3] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x5432);
1558 dst0
[3] = __byte_perm_S (src_l0
[3], src_r0
[0], 0x4210);
1559 dst1
[0] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1560 dst1
[1] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1561 dst1
[2] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1562 dst1
[3] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x4321);
1566 dst1
[0] = src_r0
[0];
1567 dst1
[1] = src_r0
[1];
1568 dst1
[2] = src_r0
[2];
1569 dst1
[3] = src_r0
[3];
1573 dst1
[0] = __byte_perm_S (src_l1
[0], src_r0
[0], 0x6540);
1574 dst1
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1575 dst1
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1576 dst1
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1580 dst1
[0] = __byte_perm_S (src_l1
[0], src_r0
[0], 0x5410);
1581 dst1
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1582 dst1
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1583 dst1
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1587 dst1
[0] = __byte_perm_S (src_l1
[0], src_r0
[0], 0x4210);
1588 dst1
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1589 dst1
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1590 dst1
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1594 dst1
[1] = src_r0
[0];
1595 dst1
[2] = src_r0
[1];
1596 dst1
[3] = src_r0
[2];
1600 dst1
[1] = __byte_perm_S (src_l1
[1], src_r0
[0], 0x6540);
1601 dst1
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1602 dst1
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1606 dst1
[1] = __byte_perm_S (src_l1
[1], src_r0
[0], 0x5410);
1607 dst1
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1608 dst1
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1612 dst1
[1] = __byte_perm_S (src_l1
[1], src_r0
[0], 0x4210);
1613 dst1
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1614 dst1
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1618 dst1
[2] = src_r0
[0];
1619 dst1
[3] = src_r0
[1];
1623 dst1
[2] = __byte_perm_S (src_l1
[2], src_r0
[0], 0x6540);
1624 dst1
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1628 dst1
[2] = __byte_perm_S (src_l1
[2], src_r0
[0], 0x5410);
1629 dst1
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1633 dst1
[2] = __byte_perm_S (src_l1
[2], src_r0
[0], 0x4210);
1634 dst1
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1638 dst1
[3] = src_r0
[0];
1642 dst1
[3] = __byte_perm_S (src_l1
[3], src_r0
[0], 0x6540);
1646 dst1
[3] = __byte_perm_S (src_l1
[3], src_r0
[0], 0x5410);
1650 dst1
[3] = __byte_perm_S (src_l1
[3], src_r0
[0], 0x4210);
1655 #if defined IS_AMD || defined IS_GENERIC
1659 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
1662 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
1665 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
1668 dst1
[3] = src_r0
[0];
1671 dst1
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1672 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
1675 dst1
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1676 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
1679 dst1
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1680 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
1683 dst1
[3] = src_r0
[1];
1684 dst1
[2] = src_r0
[0];
1687 dst1
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1688 dst1
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1689 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
1692 dst1
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1693 dst1
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1694 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
1697 dst1
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1698 dst1
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1699 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
1702 dst1
[3] = src_r0
[2];
1703 dst1
[2] = src_r0
[1];
1704 dst1
[1] = src_r0
[0];
1707 dst1
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1708 dst1
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1709 dst1
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1710 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
1713 dst1
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1714 dst1
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1715 dst1
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1716 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
1719 dst1
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1720 dst1
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1721 dst1
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1722 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
1725 dst1
[3] = src_r0
[3];
1726 dst1
[2] = src_r0
[2];
1727 dst1
[1] = src_r0
[1];
1728 dst1
[0] = src_r0
[0];
1731 dst1
[3] = amd_bytealign_S (src_r1
[0], src_r0
[3], 1);
1732 dst1
[2] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1733 dst1
[1] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1734 dst1
[0] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1735 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
1738 dst1
[3] = amd_bytealign_S (src_r1
[0], src_r0
[3], 2);
1739 dst1
[2] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1740 dst1
[1] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1741 dst1
[0] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1742 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
1745 dst1
[3] = amd_bytealign_S (src_r1
[0], src_r0
[3], 3);
1746 dst1
[2] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1747 dst1
[1] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1748 dst1
[0] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1749 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
1752 dst1
[3] = src_r1
[0];
1753 dst1
[2] = src_r0
[3];
1754 dst1
[1] = src_r0
[2];
1755 dst1
[0] = src_r0
[1];
1756 dst0
[3] = src_r0
[0];
1759 dst1
[3] = amd_bytealign_S (src_r1
[1], src_r1
[0], 1);
1760 dst1
[2] = amd_bytealign_S (src_r1
[0], src_r0
[3], 1);
1761 dst1
[1] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1762 dst1
[0] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1763 dst0
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1764 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
1767 dst1
[3] = amd_bytealign_S (src_r1
[1], src_r1
[0], 2);
1768 dst1
[2] = amd_bytealign_S (src_r1
[0], src_r0
[3], 2);
1769 dst1
[1] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1770 dst1
[0] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1771 dst0
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1772 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
1775 dst1
[3] = amd_bytealign_S (src_r1
[1], src_r1
[0], 3);
1776 dst1
[2] = amd_bytealign_S (src_r1
[0], src_r0
[3], 3);
1777 dst1
[1] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1778 dst1
[0] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1779 dst0
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1780 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
1783 dst1
[3] = src_r1
[1];
1784 dst1
[2] = src_r1
[0];
1785 dst1
[1] = src_r0
[3];
1786 dst1
[0] = src_r0
[2];
1787 dst0
[3] = src_r0
[1];
1788 dst0
[2] = src_r0
[0];
1791 dst1
[3] = amd_bytealign_S (src_r1
[2], src_r1
[1], 1);
1792 dst1
[2] = amd_bytealign_S (src_r1
[1], src_r1
[0], 1);
1793 dst1
[1] = amd_bytealign_S (src_r1
[0], src_r0
[3], 1);
1794 dst1
[0] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1795 dst0
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1796 dst0
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1797 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
1800 dst1
[3] = amd_bytealign_S (src_r1
[2], src_r1
[1], 2);
1801 dst1
[2] = amd_bytealign_S (src_r1
[1], src_r1
[0], 2);
1802 dst1
[1] = amd_bytealign_S (src_r1
[0], src_r0
[3], 2);
1803 dst1
[0] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1804 dst0
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1805 dst0
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1806 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
1809 dst1
[3] = amd_bytealign_S (src_r1
[2], src_r1
[1], 3);
1810 dst1
[2] = amd_bytealign_S (src_r1
[1], src_r1
[0], 3);
1811 dst1
[1] = amd_bytealign_S (src_r1
[0], src_r0
[3], 3);
1812 dst1
[0] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1813 dst0
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1814 dst0
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1815 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
1818 dst1
[3] = src_r1
[2];
1819 dst1
[2] = src_r1
[1];
1820 dst1
[1] = src_r1
[0];
1821 dst1
[0] = src_r0
[3];
1822 dst0
[3] = src_r0
[2];
1823 dst0
[2] = src_r0
[1];
1824 dst0
[1] = src_r0
[0];
1827 dst1
[3] = amd_bytealign_S (src_r1
[3], src_r1
[2], 1);
1828 dst1
[2] = amd_bytealign_S (src_r1
[2], src_r1
[1], 1);
1829 dst1
[1] = amd_bytealign_S (src_r1
[1], src_r1
[0], 1);
1830 dst1
[0] = amd_bytealign_S (src_r1
[0], src_r0
[3], 1);
1831 dst0
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1832 dst0
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1833 dst0
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1834 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
1837 dst1
[3] = amd_bytealign_S (src_r1
[3], src_r1
[2], 2);
1838 dst1
[2] = amd_bytealign_S (src_r1
[2], src_r1
[1], 2);
1839 dst1
[1] = amd_bytealign_S (src_r1
[1], src_r1
[0], 2);
1840 dst1
[0] = amd_bytealign_S (src_r1
[0], src_r0
[3], 2);
1841 dst0
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1842 dst0
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1843 dst0
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1844 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
1847 dst1
[3] = amd_bytealign_S (src_r1
[3], src_r1
[2], 3);
1848 dst1
[2] = amd_bytealign_S (src_r1
[2], src_r1
[1], 3);
1849 dst1
[1] = amd_bytealign_S (src_r1
[1], src_r1
[0], 3);
1850 dst1
[0] = amd_bytealign_S (src_r1
[0], src_r0
[3], 3);
1851 dst0
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1852 dst0
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1853 dst0
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1854 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
1857 dst1
[3] = src_r1
[3];
1858 dst1
[2] = src_r1
[2];
1859 dst1
[1] = src_r1
[1];
1860 dst1
[0] = src_r1
[0];
1861 dst0
[3] = src_r0
[3];
1862 dst0
[2] = src_r0
[2];
1863 dst0
[1] = src_r0
[1];
1864 dst0
[0] = src_r0
[0];
1870 static void reverse_block (u32 in0
[4], u32 in1
[4], u32 out0
[4], u32 out1
[4], const u32 len
)
1872 rshift_block_N (in0
, in1
, out0
, out1
, 32 - len
);
1886 out0
[0] = swap32_S (tib40
[0]);
1887 out0
[1] = swap32_S (tib40
[1]);
1888 out0
[2] = swap32_S (tib40
[2]);
1889 out0
[3] = swap32_S (tib40
[3]);
1890 out1
[0] = swap32_S (tib41
[0]);
1891 out1
[1] = swap32_S (tib41
[1]);
1892 out1
[2] = swap32_S (tib41
[2]);
1893 out1
[3] = swap32_S (tib41
[3]);
1896 static u32
rule_op_mangle_lrest (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1898 buf0
[0] |= (generate_cmask (buf0
[0]));
1899 buf0
[1] |= (generate_cmask (buf0
[1]));
1900 buf0
[2] |= (generate_cmask (buf0
[2]));
1901 buf0
[3] |= (generate_cmask (buf0
[3]));
1902 buf1
[0] |= (generate_cmask (buf1
[0]));
1903 buf1
[1] |= (generate_cmask (buf1
[1]));
1904 buf1
[2] |= (generate_cmask (buf1
[2]));
1905 buf1
[3] |= (generate_cmask (buf1
[3]));
1910 static u32
rule_op_mangle_urest (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1912 buf0
[0] &= ~(generate_cmask (buf0
[0]));
1913 buf0
[1] &= ~(generate_cmask (buf0
[1]));
1914 buf0
[2] &= ~(generate_cmask (buf0
[2]));
1915 buf0
[3] &= ~(generate_cmask (buf0
[3]));
1916 buf1
[0] &= ~(generate_cmask (buf1
[0]));
1917 buf1
[1] &= ~(generate_cmask (buf1
[1]));
1918 buf1
[2] &= ~(generate_cmask (buf1
[2]));
1919 buf1
[3] &= ~(generate_cmask (buf1
[3]));
1924 static u32
rule_op_mangle_lrest_ufirst (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1926 rule_op_mangle_lrest (p0
, p1
, buf0
, buf1
, in_len
);
1928 buf0
[0] &= ~(0x00000020 & generate_cmask (buf0
[0]));
1933 static u32
rule_op_mangle_urest_lfirst (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1935 rule_op_mangle_urest (p0
, p1
, buf0
, buf1
, in_len
);
1937 buf0
[0] |= (0x00000020 & generate_cmask (buf0
[0]));
1942 static u32
rule_op_mangle_trest (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1944 buf0
[0] ^= (generate_cmask (buf0
[0]));
1945 buf0
[1] ^= (generate_cmask (buf0
[1]));
1946 buf0
[2] ^= (generate_cmask (buf0
[2]));
1947 buf0
[3] ^= (generate_cmask (buf0
[3]));
1948 buf1
[0] ^= (generate_cmask (buf1
[0]));
1949 buf1
[1] ^= (generate_cmask (buf1
[1]));
1950 buf1
[2] ^= (generate_cmask (buf1
[2]));
1951 buf1
[3] ^= (generate_cmask (buf1
[3]));
1956 static u32
rule_op_mangle_toggle_at (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1958 if (p0
>= in_len
) return (in_len
);
1960 const u32 tmp
= 0x20u
<< ((p0
& 3) * 8);
1964 case 0: buf0
[0] ^= (tmp
& generate_cmask (buf0
[0])); break;
1965 case 1: buf0
[1] ^= (tmp
& generate_cmask (buf0
[1])); break;
1966 case 2: buf0
[2] ^= (tmp
& generate_cmask (buf0
[2])); break;
1967 case 3: buf0
[3] ^= (tmp
& generate_cmask (buf0
[3])); break;
1968 case 4: buf1
[0] ^= (tmp
& generate_cmask (buf1
[0])); break;
1969 case 5: buf1
[1] ^= (tmp
& generate_cmask (buf1
[1])); break;
1970 case 6: buf1
[2] ^= (tmp
& generate_cmask (buf1
[2])); break;
1971 case 7: buf1
[3] ^= (tmp
& generate_cmask (buf1
[3])); break;
1977 static u32
rule_op_mangle_reverse (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1979 reverse_block (buf0
, buf1
, buf0
, buf1
, in_len
);
1984 static u32
rule_op_mangle_dupeword (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1986 if ((in_len
+ in_len
) >= 32) return (in_len
);
1988 u32 out_len
= in_len
;
1990 append_block8 (out_len
, buf0
, buf1
, buf0
, buf1
, buf0
, buf1
);
1997 static u32
rule_op_mangle_dupeword_times (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1999 if (((in_len
* p0
) + in_len
) >= 32) return (in_len
);
2001 u32 out_len
= in_len
;
2015 for (u32 i
= 0; i
< p0
; i
++)
2017 append_block8 (out_len
, buf0
, buf1
, buf0
, buf1
, tib40
, tib41
);
2025 static u32
rule_op_mangle_reflect (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2027 if ((in_len
+ in_len
) >= 32) return (in_len
);
2029 u32 out_len
= in_len
;
2034 reverse_block (buf0
, buf1
, tib40
, tib41
, out_len
);
2036 append_block8 (out_len
, buf0
, buf1
, buf0
, buf1
, tib40
, tib41
);
2043 static u32
rule_op_mangle_append (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2045 if ((in_len
+ 1) >= 32) return (in_len
);
2047 u32 out_len
= in_len
;
2049 append_block1 (out_len
, buf0
, buf1
, p0
);
2056 static u32
rule_op_mangle_prepend (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2058 if ((in_len
+ 1) >= 32) return (in_len
);
2060 u32 out_len
= in_len
;
2062 rshift_block (buf0
, buf1
, buf0
, buf1
);
2064 buf0
[0] = buf0
[0] | p0
;
2071 static u32
rule_op_mangle_rotate_left (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2073 if (in_len
== 0) return (in_len
);
2075 const u32 in_len1
= in_len
- 1;
2077 const u32 sh
= (in_len1
& 3) * 8;
2079 const u32 tmp
= (buf0
[0] & 0xff) << sh
;
2081 lshift_block (buf0
, buf1
, buf0
, buf1
);
2083 switch (in_len1
/ 4)
2085 case 0: buf0
[0] |= tmp
; break;
2086 case 1: buf0
[1] |= tmp
; break;
2087 case 2: buf0
[2] |= tmp
; break;
2088 case 3: buf0
[3] |= tmp
; break;
2089 case 4: buf1
[0] |= tmp
; break;
2090 case 5: buf1
[1] |= tmp
; break;
2091 case 6: buf1
[2] |= tmp
; break;
2092 case 7: buf1
[3] |= tmp
; break;
2098 static u32
rule_op_mangle_rotate_right (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2100 if (in_len
== 0) return (in_len
);
2102 const u32 in_len1
= in_len
- 1;
2104 const u32 sh
= (in_len1
& 3) * 8;
2108 switch (in_len1
/ 4)
2110 case 0: tmp
= (buf0
[0] >> sh
) & 0xff; break;
2111 case 1: tmp
= (buf0
[1] >> sh
) & 0xff; break;
2112 case 2: tmp
= (buf0
[2] >> sh
) & 0xff; break;
2113 case 3: tmp
= (buf0
[3] >> sh
) & 0xff; break;
2114 case 4: tmp
= (buf1
[0] >> sh
) & 0xff; break;
2115 case 5: tmp
= (buf1
[1] >> sh
) & 0xff; break;
2116 case 6: tmp
= (buf1
[2] >> sh
) & 0xff; break;
2117 case 7: tmp
= (buf1
[3] >> sh
) & 0xff; break;
2120 rshift_block (buf0
, buf1
, buf0
, buf1
);
2124 truncate_right (buf0
, buf1
, in_len
);
2129 static u32
rule_op_mangle_delete_first (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2131 if (in_len
== 0) return (in_len
);
2133 const u32 in_len1
= in_len
- 1;
2135 lshift_block (buf0
, buf1
, buf0
, buf1
);
2140 static u32
rule_op_mangle_delete_last (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2142 if (in_len
== 0) return (in_len
);
2144 const u32 in_len1
= in_len
- 1;
2146 const u32 tmp
= (1 << ((in_len1
& 3) * 8)) - 1;
2148 switch (in_len1
/ 4)
2150 case 0: buf0
[0] &= tmp
; break;
2151 case 1: buf0
[1] &= tmp
; break;
2152 case 2: buf0
[2] &= tmp
; break;
2153 case 3: buf0
[3] &= tmp
; break;
2154 case 4: buf1
[0] &= tmp
; break;
2155 case 5: buf1
[1] &= tmp
; break;
2156 case 6: buf1
[2] &= tmp
; break;
2157 case 7: buf1
[3] &= tmp
; break;
2163 static u32
rule_op_mangle_delete_at (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2165 if (p0
>= in_len
) return (in_len
);
2167 u32 out_len
= in_len
;
2172 lshift_block (buf0
, buf1
, tib40
, tib41
);
2174 const u32 ml
= (1 << ((p0
& 3) * 8)) - 1;
2179 case 0: buf0
[0] = (buf0
[0] & ml
)
2189 case 1: buf0
[1] = (buf0
[1] & ml
)
2198 case 2: buf0
[2] = (buf0
[2] & ml
)
2206 case 3: buf0
[3] = (buf0
[3] & ml
)
2213 case 4: buf1
[0] = (buf1
[0] & ml
)
2219 case 5: buf1
[1] = (buf1
[1] & ml
)
2224 case 6: buf1
[2] = (buf1
[2] & ml
)
2228 case 7: buf1
[3] = (buf1
[3] & ml
)
2238 static u32
rule_op_mangle_extract (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2240 if (p0
>= in_len
) return (in_len
);
2242 if ((p0
+ p1
) > in_len
) return (in_len
);
2246 lshift_block_N (buf0
, buf1
, buf0
, buf1
, p0
);
2248 truncate_right (buf0
, buf1
, out_len
);
2253 static u32
rule_op_mangle_omit (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2255 if (p0
>= in_len
) return (in_len
);
2257 if ((p0
+ p1
) > in_len
) return (in_len
);
2259 u32 out_len
= in_len
;
2273 lshift_block_N (buf0
, buf1
, tib40
, tib41
, p1
);
2275 const u32 ml
= (1 << ((p0
& 3) * 8)) - 1;
2280 case 0: buf0
[0] = (buf0
[0] & ml
)
2290 case 1: buf0
[1] = (buf0
[1] & ml
)
2299 case 2: buf0
[2] = (buf0
[2] & ml
)
2307 case 3: buf0
[3] = (buf0
[3] & ml
)
2314 case 4: buf1
[0] = (buf1
[0] & ml
)
2320 case 5: buf1
[1] = (buf1
[1] & ml
)
2325 case 6: buf1
[2] = (buf1
[2] & ml
)
2329 case 7: buf1
[3] = (buf1
[3] & ml
)
2339 static u32
rule_op_mangle_insert (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2341 if (p0
> in_len
) return (in_len
);
2343 if ((in_len
+ 1) >= 32) return (in_len
);
2345 u32 out_len
= in_len
;
2350 rshift_block (buf0
, buf1
, tib40
, tib41
);
2352 const u32 p1n
= p1
<< ((p0
& 3) * 8);
2354 const u32 ml
= (1 << ((p0
& 3) * 8)) - 1;
2356 const u32 mr
= 0xffffff00 << ((p0
& 3) * 8);
2360 case 0: buf0
[0] = (buf0
[0] & ml
) | p1n
| (tib40
[0] & mr
);
2369 case 1: buf0
[1] = (buf0
[1] & ml
) | p1n
| (tib40
[1] & mr
);
2377 case 2: buf0
[2] = (buf0
[2] & ml
) | p1n
| (tib40
[2] & mr
);
2384 case 3: buf0
[3] = (buf0
[3] & ml
) | p1n
| (tib40
[3] & mr
);
2390 case 4: buf1
[0] = (buf1
[0] & ml
) | p1n
| (tib41
[0] & mr
);
2395 case 5: buf1
[1] = (buf1
[1] & ml
) | p1n
| (tib41
[1] & mr
);
2399 case 6: buf1
[2] = (buf1
[2] & ml
) | p1n
| (tib41
[2] & mr
);
2402 case 7: buf1
[3] = (buf1
[3] & ml
) | p1n
| (tib41
[3] & mr
);
2411 static u32
rule_op_mangle_overstrike (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2413 if (p0
>= in_len
) return (in_len
);
2415 const u32 p1n
= p1
<< ((p0
& 3) * 8);
2417 const u32 m
= ~(0xffu
<< ((p0
& 3) * 8));
2421 case 0: buf0
[0] = (buf0
[0] & m
) | p1n
; break;
2422 case 1: buf0
[1] = (buf0
[1] & m
) | p1n
; break;
2423 case 2: buf0
[2] = (buf0
[2] & m
) | p1n
; break;
2424 case 3: buf0
[3] = (buf0
[3] & m
) | p1n
; break;
2425 case 4: buf1
[0] = (buf1
[0] & m
) | p1n
; break;
2426 case 5: buf1
[1] = (buf1
[1] & m
) | p1n
; break;
2427 case 6: buf1
[2] = (buf1
[2] & m
) | p1n
; break;
2428 case 7: buf1
[3] = (buf1
[3] & m
) | p1n
; break;
2434 static u32
rule_op_mangle_truncate_at (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2436 if (p0
>= in_len
) return (in_len
);
2438 truncate_right (buf0
, buf1
, p0
);
2443 static u32
rule_op_mangle_replace (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2446 for (u32 i
= 0; i
< in_len
; i
++)
2450 case 0: if ((__byte_perm_S (buf0
[0], 0, 0x6540)) == p0
) buf0
[0] = __byte_perm_S (p1
, buf0
[0], 0x7650);
2452 case 1: if ((__byte_perm_S (buf0
[0], 0, 0x6541)) == p0
) buf0
[0] = __byte_perm_S (p1
, buf0
[0], 0x7604);
2454 case 2: if ((__byte_perm_S (buf0
[0], 0, 0x6542)) == p0
) buf0
[0] = __byte_perm_S (p1
, buf0
[0], 0x7054);
2456 case 3: if ((__byte_perm_S (buf0
[0], 0, 0x6543)) == p0
) buf0
[0] = __byte_perm_S (p1
, buf0
[0], 0x0654);
2458 case 4: if ((__byte_perm_S (buf0
[1], 0, 0x6540)) == p0
) buf0
[1] = __byte_perm_S (p1
, buf0
[1], 0x7650);
2460 case 5: if ((__byte_perm_S (buf0
[1], 0, 0x6541)) == p0
) buf0
[1] = __byte_perm_S (p1
, buf0
[1], 0x7604);
2462 case 6: if ((__byte_perm_S (buf0
[1], 0, 0x6542)) == p0
) buf0
[1] = __byte_perm_S (p1
, buf0
[1], 0x7054);
2464 case 7: if ((__byte_perm_S (buf0
[1], 0, 0x6543)) == p0
) buf0
[1] = __byte_perm_S (p1
, buf0
[1], 0x0654);
2466 case 8: if ((__byte_perm_S (buf0
[2], 0, 0x6540)) == p0
) buf0
[2] = __byte_perm_S (p1
, buf0
[2], 0x7650);
2468 case 9: if ((__byte_perm_S (buf0
[2], 0, 0x6541)) == p0
) buf0
[2] = __byte_perm_S (p1
, buf0
[2], 0x7604);
2470 case 10: if ((__byte_perm_S (buf0
[2], 0, 0x6542)) == p0
) buf0
[2] = __byte_perm_S (p1
, buf0
[2], 0x7054);
2472 case 11: if ((__byte_perm_S (buf0
[2], 0, 0x6543)) == p0
) buf0
[2] = __byte_perm_S (p1
, buf0
[2], 0x0654);
2474 case 12: if ((__byte_perm_S (buf0
[3], 0, 0x6540)) == p0
) buf0
[3] = __byte_perm_S (p1
, buf0
[3], 0x7650);
2476 case 13: if ((__byte_perm_S (buf0
[3], 0, 0x6541)) == p0
) buf0
[3] = __byte_perm_S (p1
, buf0
[3], 0x7604);
2478 case 14: if ((__byte_perm_S (buf0
[3], 0, 0x6542)) == p0
) buf0
[3] = __byte_perm_S (p1
, buf0
[3], 0x7054);
2480 case 15: if ((__byte_perm_S (buf0
[3], 0, 0x6543)) == p0
) buf0
[3] = __byte_perm_S (p1
, buf0
[3], 0x0654);
2482 case 16: if ((__byte_perm_S (buf1
[0], 0, 0x6540)) == p0
) buf1
[0] = __byte_perm_S (p1
, buf1
[0], 0x7650);
2484 case 17: if ((__byte_perm_S (buf1
[0], 0, 0x6541)) == p0
) buf1
[0] = __byte_perm_S (p1
, buf1
[0], 0x7604);
2486 case 18: if ((__byte_perm_S (buf1
[0], 0, 0x6542)) == p0
) buf1
[0] = __byte_perm_S (p1
, buf1
[0], 0x7054);
2488 case 19: if ((__byte_perm_S (buf1
[0], 0, 0x6543)) == p0
) buf1
[0] = __byte_perm_S (p1
, buf1
[0], 0x0654);
2490 case 20: if ((__byte_perm_S (buf1
[1], 0, 0x6540)) == p0
) buf1
[1] = __byte_perm_S (p1
, buf1
[1], 0x7650);
2492 case 21: if ((__byte_perm_S (buf1
[1], 0, 0x6541)) == p0
) buf1
[1] = __byte_perm_S (p1
, buf1
[1], 0x7604);
2494 case 22: if ((__byte_perm_S (buf1
[1], 0, 0x6542)) == p0
) buf1
[1] = __byte_perm_S (p1
, buf1
[1], 0x7054);
2496 case 23: if ((__byte_perm_S (buf1
[1], 0, 0x6543)) == p0
) buf1
[1] = __byte_perm_S (p1
, buf1
[1], 0x0654);
2498 case 24: if ((__byte_perm_S (buf1
[2], 0, 0x6540)) == p0
) buf1
[2] = __byte_perm_S (p1
, buf1
[2], 0x7650);
2500 case 25: if ((__byte_perm_S (buf1
[2], 0, 0x6541)) == p0
) buf1
[2] = __byte_perm_S (p1
, buf1
[2], 0x7604);
2502 case 26: if ((__byte_perm_S (buf1
[2], 0, 0x6542)) == p0
) buf1
[2] = __byte_perm_S (p1
, buf1
[2], 0x7054);
2504 case 27: if ((__byte_perm_S (buf1
[2], 0, 0x6543)) == p0
) buf1
[2] = __byte_perm_S (p1
, buf1
[2], 0x0654);
2506 case 28: if ((__byte_perm_S (buf1
[3], 0, 0x6540)) == p0
) buf1
[3] = __byte_perm_S (p1
, buf1
[3], 0x7650);
2508 case 29: if ((__byte_perm_S (buf1
[3], 0, 0x6541)) == p0
) buf1
[3] = __byte_perm_S (p1
, buf1
[3], 0x7604);
2510 case 30: if ((__byte_perm_S (buf1
[3], 0, 0x6542)) == p0
) buf1
[3] = __byte_perm_S (p1
, buf1
[3], 0x7054);
2512 case 31: if ((__byte_perm_S (buf1
[3], 0, 0x6543)) == p0
) buf1
[3] = __byte_perm_S (p1
, buf1
[3], 0x0654);
2518 #if defined IS_AMD || defined IS_GENERIC
2519 const uchar4 tmp0
= (uchar4
) (p0
);
2520 const uchar4 tmp1
= (uchar4
) (p1
);
2524 tmp
= as_uchar4 (buf0
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[0] = as_uint (tmp
);
2525 tmp
= as_uchar4 (buf0
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[1] = as_uint (tmp
);
2526 tmp
= as_uchar4 (buf0
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[2] = as_uint (tmp
);
2527 tmp
= as_uchar4 (buf0
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[3] = as_uint (tmp
);
2528 tmp
= as_uchar4 (buf1
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[0] = as_uint (tmp
);
2529 tmp
= as_uchar4 (buf1
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[1] = as_uint (tmp
);
2530 tmp
= as_uchar4 (buf1
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[2] = as_uint (tmp
);
2531 tmp
= as_uchar4 (buf1
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[3] = as_uint (tmp
);
2537 static u32
rule_op_mangle_purgechar (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2543 static u32
rule_op_mangle_togglecase_rec (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2549 static u32
rule_op_mangle_dupechar_first (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2551 if ( in_len
== 0) return (in_len
);
2552 if ((in_len
+ p0
) >= 32) return (in_len
);
2554 u32 out_len
= in_len
;
2556 const u32 tmp
= buf0
[0] & 0xFF;
2558 rshift_block_N (buf0
, buf1
, buf0
, buf1
, p0
);
2563 case 1: buf0
[0] |= tmp
;
2565 case 2: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x5400);
2567 case 3: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x4000);
2569 case 4: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2571 case 5: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2574 case 6: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2575 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x5400);
2577 case 7: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2578 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x4000);
2580 case 8: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2581 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2583 case 9: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2584 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2587 case 10: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2588 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2589 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x5400);
2591 case 11: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2592 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2593 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x4000);
2595 case 12: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2596 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2597 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2599 case 13: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2600 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2601 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2604 case 14: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2605 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2606 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2607 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x5400);
2609 case 15: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2610 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2611 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2612 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x4000);
2614 case 16: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2615 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2616 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2617 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2619 case 17: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2620 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2621 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2622 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2625 case 18: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2626 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2627 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2628 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2629 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x5400);
2631 case 19: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2632 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2633 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2634 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2635 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x4000);
2637 case 20: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2638 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2639 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2640 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2641 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2643 case 21: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2644 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2645 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2646 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2647 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2650 case 22: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2651 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2652 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2653 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2654 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2655 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x5400);
2657 case 23: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2658 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2659 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2660 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2661 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2662 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x4000);
2664 case 24: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2665 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2666 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2667 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2668 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2669 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2671 case 25: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2672 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2673 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2674 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2675 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2676 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2679 case 26: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2680 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2681 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2682 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2683 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2684 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2685 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x5400);
2687 case 27: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2688 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2689 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2690 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2691 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2692 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2693 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x4000);
2695 case 28: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2696 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2697 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2698 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2699 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2700 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2701 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2703 case 29: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2704 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2705 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2706 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2707 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2708 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2709 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2712 case 30: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2713 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2714 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2715 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2716 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2717 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2718 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2719 buf1
[3] |= __byte_perm_S (tmp
, 0, 0x5400);
2721 case 31: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2722 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2723 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2724 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2725 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2726 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2727 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2728 buf1
[3] |= __byte_perm_S (tmp
, 0, 0x4000);
2733 #if defined IS_AMD || defined IS_GENERIC
2736 case 1: buf0
[0] |= tmp
<< 0;
2738 case 2: buf0
[0] |= tmp
<< 0 | tmp
<< 8;
2740 case 3: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2742 case 4: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2744 case 5: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2745 buf0
[1] |= tmp
<< 0;
2747 case 6: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2748 buf0
[1] |= tmp
<< 0 | tmp
<< 8;
2750 case 7: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2751 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2753 case 8: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2754 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2756 case 9: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2757 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2758 buf0
[2] |= tmp
<< 0;
2760 case 10: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2761 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2762 buf0
[2] |= tmp
<< 0 | tmp
<< 8;
2764 case 11: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2765 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2766 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2768 case 12: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2769 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2770 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2772 case 13: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2773 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2774 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2775 buf0
[3] |= tmp
<< 0;
2777 case 14: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2778 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2779 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2780 buf0
[3] |= tmp
<< 0 | tmp
<< 8;
2782 case 15: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2783 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2784 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2785 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2787 case 16: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2788 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2789 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2790 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2792 case 17: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2793 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2794 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2795 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2796 buf1
[0] |= tmp
<< 0;
2798 case 18: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2799 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2800 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2801 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2802 buf1
[0] |= tmp
<< 0 | tmp
<< 8;
2804 case 19: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2805 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2806 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2807 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2808 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2810 case 20: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2811 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2812 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2813 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2814 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2816 case 21: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2817 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2818 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2819 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2820 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2821 buf1
[1] |= tmp
<< 0;
2823 case 22: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2824 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2825 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2826 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2827 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2828 buf1
[1] |= tmp
<< 0 | tmp
<< 8;
2830 case 23: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2831 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2832 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2833 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2834 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2835 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2837 case 24: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2838 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2839 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2840 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2841 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2842 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2844 case 25: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2845 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2846 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2847 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2848 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2849 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2850 buf1
[2] |= tmp
<< 0;
2852 case 26: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2853 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2854 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2855 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2856 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2857 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2858 buf1
[2] |= tmp
<< 0 | tmp
<< 8;
2860 case 27: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2861 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2862 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2863 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2864 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2865 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2866 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2868 case 28: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2869 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2870 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2871 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2872 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2873 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2874 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2876 case 29: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2877 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2878 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2879 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2880 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2881 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2882 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2883 buf1
[3] |= tmp
<< 0;
2885 case 30: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2886 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2887 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2888 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2889 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2890 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2891 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2892 buf1
[3] |= tmp
<< 0 | tmp
<< 8;
2894 case 31: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2895 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2896 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2897 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2898 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2899 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2900 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2901 buf1
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2911 static u32
rule_op_mangle_dupechar_last (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2913 if ( in_len
== 0) return (in_len
);
2914 if ((in_len
+ p0
) >= 32) return (in_len
);
2916 const u32 in_len1
= in_len
- 1;
2918 const u32 sh
= (in_len1
& 3) * 8;
2922 switch (in_len1
/ 4)
2924 case 0: tmp
= (buf0
[0] >> sh
) & 0xff; break;
2925 case 1: tmp
= (buf0
[1] >> sh
) & 0xff; break;
2926 case 2: tmp
= (buf0
[2] >> sh
) & 0xff; break;
2927 case 3: tmp
= (buf0
[3] >> sh
) & 0xff; break;
2928 case 4: tmp
= (buf1
[0] >> sh
) & 0xff; break;
2929 case 5: tmp
= (buf1
[1] >> sh
) & 0xff; break;
2930 case 6: tmp
= (buf1
[2] >> sh
) & 0xff; break;
2931 case 7: tmp
= (buf1
[3] >> sh
) & 0xff; break;
2934 u32 out_len
= in_len
;
2936 for (u32 i
= 0; i
< p0
; i
++)
2938 append_block1 (out_len
, buf0
, buf1
, tmp
);
2946 static u32
rule_op_mangle_dupechar_all (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2948 if ( in_len
== 0) return (in_len
);
2949 if ((in_len
+ in_len
) >= 32) return (in_len
);
2951 u32 out_len
= in_len
;
2957 tib40
[0] = __byte_perm_S (buf0
[0], 0, 0x1100);
2958 tib40
[1] = __byte_perm_S (buf0
[0], 0, 0x3322);
2959 tib40
[2] = __byte_perm_S (buf0
[1], 0, 0x1100);
2960 tib40
[3] = __byte_perm_S (buf0
[1], 0, 0x3322);
2961 tib41
[0] = __byte_perm_S (buf0
[2], 0, 0x1100);
2962 tib41
[1] = __byte_perm_S (buf0
[2], 0, 0x3322);
2963 tib41
[2] = __byte_perm_S (buf0
[3], 0, 0x1100);
2964 tib41
[3] = __byte_perm_S (buf0
[3], 0, 0x3322);
2976 #if defined IS_AMD || defined IS_GENERIC
2977 tib40
[0] = ((buf0
[0] & 0x000000FF) << 0) | ((buf0
[0] & 0x0000FF00) << 8);
2978 tib40
[1] = ((buf0
[0] & 0x00FF0000) >> 16) | ((buf0
[0] & 0xFF000000) >> 8);
2979 tib40
[2] = ((buf0
[1] & 0x000000FF) << 0) | ((buf0
[1] & 0x0000FF00) << 8);
2980 tib40
[3] = ((buf0
[1] & 0x00FF0000) >> 16) | ((buf0
[1] & 0xFF000000) >> 8);
2981 tib41
[0] = ((buf0
[2] & 0x000000FF) << 0) | ((buf0
[2] & 0x0000FF00) << 8);
2982 tib41
[1] = ((buf0
[2] & 0x00FF0000) >> 16) | ((buf0
[2] & 0xFF000000) >> 8);
2983 tib41
[2] = ((buf0
[3] & 0x000000FF) << 0) | ((buf0
[3] & 0x0000FF00) << 8);
2984 tib41
[3] = ((buf0
[3] & 0x00FF0000) >> 16) | ((buf0
[3] & 0xFF000000) >> 8);
2986 buf0
[0] = tib40
[0] | (tib40
[0] << 8);
2987 buf0
[1] = tib40
[1] | (tib40
[1] << 8);
2988 buf0
[2] = tib40
[2] | (tib40
[2] << 8);
2989 buf0
[3] = tib40
[3] | (tib40
[3] << 8);
2990 buf1
[0] = tib41
[0] | (tib41
[0] << 8);
2991 buf1
[1] = tib41
[1] | (tib41
[1] << 8);
2992 buf1
[2] = tib41
[2] | (tib41
[2] << 8);
2993 buf1
[3] = tib41
[3] | (tib41
[3] << 8);
2996 out_len
= out_len
+ out_len
;
3001 static u32
rule_op_mangle_switch_first (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3003 if (in_len
< 2) return (in_len
);
3006 buf0
[0] = __byte_perm_S (buf0
[0], 0, 0x3201);
3009 #if defined IS_AMD || defined IS_GENERIC
3010 buf0
[0] = (buf0
[0] & 0xFFFF0000) | ((buf0
[0] << 8) & 0x0000FF00) | ((buf0
[0] >> 8) & 0x000000FF);
3016 static u32
rule_op_mangle_switch_last (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3018 if (in_len
< 2) return (in_len
);
3023 case 2: buf0
[0] = __byte_perm_S (buf0
[0], 0, 0x5401);
3025 case 3: buf0
[0] = __byte_perm_S (buf0
[0], 0, 0x4120);
3027 case 4: buf0
[0] = __byte_perm_S (buf0
[0], 0, 0x2310);
3029 case 5: buf0
[1] = __byte_perm_S (buf0
[1], buf0
[0], 0x7210);
3030 buf0
[0] = __byte_perm_S (buf0
[0], buf0
[1], 0x4210);
3031 buf0
[1] = __byte_perm_S (buf0
[1], 0, 0x6543);
3033 case 6: buf0
[1] = __byte_perm_S (buf0
[1], 0, 0x5401);
3035 case 7: buf0
[1] = __byte_perm_S (buf0
[1], 0, 0x4120);
3037 case 8: buf0
[1] = __byte_perm_S (buf0
[1], 0, 0x2310);
3039 case 9: buf0
[2] = __byte_perm_S (buf0
[2], buf0
[1], 0x7210);
3040 buf0
[1] = __byte_perm_S (buf0
[1], buf0
[2], 0x4210);
3041 buf0
[2] = __byte_perm_S (buf0
[2], 0, 0x6543);
3043 case 10: buf0
[2] = __byte_perm_S (buf0
[2], 0, 0x5401);
3045 case 11: buf0
[2] = __byte_perm_S (buf0
[2], 0, 0x4120);
3047 case 12: buf0
[2] = __byte_perm_S (buf0
[2], 0, 0x2310);
3049 case 13: buf0
[3] = __byte_perm_S (buf0
[3], buf0
[2], 0x7210);
3050 buf0
[2] = __byte_perm_S (buf0
[2], buf0
[3], 0x4210);
3051 buf0
[3] = __byte_perm_S (buf0
[3], 0, 0x6543);
3053 case 14: buf0
[3] = __byte_perm_S (buf0
[3], 0, 0x5401);
3055 case 15: buf0
[3] = __byte_perm_S (buf0
[3], 0, 0x4120);
3057 case 16: buf0
[3] = __byte_perm_S (buf0
[3], 0, 0x2310);
3059 case 17: buf1
[0] = __byte_perm_S (buf1
[0], buf0
[3], 0x7210);
3060 buf0
[3] = __byte_perm_S (buf0
[3], buf1
[0], 0x4210);
3061 buf1
[0] = __byte_perm_S (buf1
[0], 0, 0x6543);
3063 case 18: buf1
[0] = __byte_perm_S (buf1
[0], 0, 0x5401);
3065 case 19: buf1
[0] = __byte_perm_S (buf1
[0], 0, 0x4120);
3067 case 20: buf1
[0] = __byte_perm_S (buf1
[0], 0, 0x2310);
3069 case 21: buf1
[1] = __byte_perm_S (buf1
[1], buf1
[0], 0x7210);
3070 buf1
[0] = __byte_perm_S (buf1
[0], buf1
[1], 0x4210);
3071 buf1
[1] = __byte_perm_S (buf1
[1], 0, 0x6543);
3073 case 22: buf1
[1] = __byte_perm_S (buf1
[1], 0, 0x5401);
3075 case 23: buf1
[1] = __byte_perm_S (buf1
[1], 0, 0x4120);
3077 case 24: buf1
[1] = __byte_perm_S (buf1
[1], 0, 0x2310);
3079 case 25: buf1
[2] = __byte_perm_S (buf1
[2], buf1
[1], 0x7210);
3080 buf1
[1] = __byte_perm_S (buf1
[1], buf1
[2], 0x4210);
3081 buf1
[2] = __byte_perm_S (buf1
[2], 0, 0x6543);
3083 case 26: buf1
[2] = __byte_perm_S (buf1
[2], 0, 0x5401);
3085 case 27: buf1
[2] = __byte_perm_S (buf1
[2], 0, 0x4120);
3087 case 28: buf1
[2] = __byte_perm_S (buf1
[2], 0, 0x2310);
3089 case 29: buf1
[3] = __byte_perm_S (buf1
[3], buf1
[2], 0x7210);
3090 buf1
[2] = __byte_perm_S (buf1
[2], buf1
[3], 0x4210);
3091 buf1
[3] = __byte_perm_S (buf1
[3], 0, 0x6543);
3093 case 30: buf1
[3] = __byte_perm_S (buf1
[3], 0, 0x5401);
3095 case 31: buf1
[3] = __byte_perm_S (buf1
[3], 0, 0x4120);
3100 #if defined IS_AMD || defined IS_GENERIC
3103 case 2: buf0
[0] = ((buf0
[0] << 8) & 0x0000FF00) | ((buf0
[0] >> 8) & 0x000000FF);
3105 case 3: buf0
[0] = (buf0
[0] & 0x000000FF) | ((buf0
[0] << 8) & 0x00FF0000) | ((buf0
[0] >> 8) & 0x0000FF00);
3107 case 4: buf0
[0] = (buf0
[0] & 0x0000FFFF) | ((buf0
[0] << 8) & 0xFF000000) | ((buf0
[0] >> 8) & 0x00FF0000);
3109 case 5: buf0
[1] = (buf0
[0] & 0xFF000000) | buf0
[1];
3110 buf0
[0] = (buf0
[0] & 0x00FFFFFF) | (buf0
[1] << 24);
3111 buf0
[1] = (buf0
[1] >> 24);
3113 case 6: buf0
[1] = ((buf0
[1] << 8) & 0x0000FF00) | ((buf0
[1] >> 8) & 0x000000FF);
3115 case 7: buf0
[1] = (buf0
[1] & 0x000000FF) | ((buf0
[1] << 8) & 0x00FF0000) | ((buf0
[1] >> 8) & 0x0000FF00);
3117 case 8: buf0
[1] = (buf0
[1] & 0x0000FFFF) | ((buf0
[1] << 8) & 0xFF000000) | ((buf0
[1] >> 8) & 0x00FF0000);
3119 case 9: buf0
[2] = (buf0
[1] & 0xFF000000) | buf0
[2];
3120 buf0
[1] = (buf0
[1] & 0x00FFFFFF) | (buf0
[2] << 24);
3121 buf0
[2] = (buf0
[2] >> 24);
3123 case 10: buf0
[2] = ((buf0
[2] << 8) & 0x0000FF00) | ((buf0
[2] >> 8) & 0x000000FF);
3125 case 11: buf0
[2] = (buf0
[2] & 0x000000FF) | ((buf0
[2] << 8) & 0x00FF0000) | ((buf0
[2] >> 8) & 0x0000FF00);
3127 case 12: buf0
[2] = (buf0
[2] & 0x0000FFFF) | ((buf0
[2] << 8) & 0xFF000000) | ((buf0
[2] >> 8) & 0x00FF0000);
3129 case 13: buf0
[3] = (buf0
[2] & 0xFF000000) | buf0
[3];
3130 buf0
[2] = (buf0
[2] & 0x00FFFFFF) | (buf0
[3] << 24);
3131 buf0
[3] = (buf0
[3] >> 24);
3133 case 14: buf0
[3] = ((buf0
[3] << 8) & 0x0000FF00) | ((buf0
[3] >> 8) & 0x000000FF);
3135 case 15: buf0
[3] = (buf0
[3] & 0x000000FF) | ((buf0
[3] << 8) & 0x00FF0000) | ((buf0
[3] >> 8) & 0x0000FF00);
3137 case 16: buf0
[3] = (buf0
[3] & 0x0000FFFF) | ((buf0
[3] << 8) & 0xFF000000) | ((buf0
[3] >> 8) & 0x00FF0000);
3139 case 17: buf1
[0] = (buf0
[3] & 0xFF000000) | buf1
[0];
3140 buf0
[3] = (buf0
[3] & 0x00FFFFFF) | (buf1
[0] << 24);
3141 buf1
[0] = (buf1
[0] >> 24);
3143 case 18: buf1
[0] = ((buf1
[0] << 8) & 0x0000FF00) | ((buf1
[0] >> 8) & 0x000000FF);
3145 case 19: buf1
[0] = (buf1
[0] & 0x000000FF) | ((buf1
[0] << 8) & 0x00FF0000) | ((buf1
[0] >> 8) & 0x0000FF00);
3147 case 20: buf1
[0] = (buf1
[0] & 0x0000FFFF) | ((buf1
[0] << 8) & 0xFF000000) | ((buf1
[0] >> 8) & 0x00FF0000);
3149 case 21: buf1
[1] = (buf1
[0] & 0xFF000000) | buf1
[1];
3150 buf1
[0] = (buf1
[0] & 0x00FFFFFF) | (buf1
[1] << 24);
3151 buf1
[1] = (buf1
[1] >> 24);
3153 case 22: buf1
[1] = ((buf1
[1] << 8) & 0x0000FF00) | ((buf1
[1] >> 8) & 0x000000FF);
3155 case 23: buf1
[1] = (buf1
[1] & 0x000000FF) | ((buf1
[1] << 8) & 0x00FF0000) | ((buf1
[1] >> 8) & 0x0000FF00);
3157 case 24: buf1
[1] = (buf1
[1] & 0x0000FFFF) | ((buf1
[1] << 8) & 0xFF000000) | ((buf1
[1] >> 8) & 0x00FF0000);
3159 case 25: buf1
[2] = (buf1
[1] & 0xFF000000) | buf1
[2];
3160 buf1
[1] = (buf1
[1] & 0x00FFFFFF) | (buf1
[2] << 24);
3161 buf1
[2] = (buf1
[2] >> 24);
3163 case 26: buf1
[2] = ((buf1
[2] << 8) & 0x0000FF00) | ((buf1
[2] >> 8) & 0x000000FF);
3165 case 27: buf1
[2] = (buf1
[2] & 0x000000FF) | ((buf1
[2] << 8) & 0x00FF0000) | ((buf1
[2] >> 8) & 0x0000FF00);
3167 case 28: buf1
[2] = (buf1
[2] & 0x0000FFFF) | ((buf1
[2] << 8) & 0xFF000000) | ((buf1
[2] >> 8) & 0x00FF0000);
3169 case 29: buf1
[3] = (buf1
[2] & 0xFF000000) | buf1
[3];
3170 buf1
[2] = (buf1
[2] & 0x00FFFFFF) | (buf1
[3] << 24);
3171 buf1
[3] = (buf1
[3] >> 24);
3173 case 30: buf1
[3] = ((buf1
[3] << 8) & 0x0000FF00) | ((buf1
[3] >> 8) & 0x000000FF);
3175 case 31: buf1
[3] = (buf1
[3] & 0x000000FF) | ((buf1
[3] << 8) & 0x00FF0000) | ((buf1
[3] >> 8) & 0x0000FF00);
3183 static u32
rule_op_mangle_switch_at (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3185 if (p0
>= in_len
) return (in_len
);
3186 if (p1
>= in_len
) return (in_len
);
3194 case 0: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6540);
3196 case 1: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6541);
3198 case 2: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6542);
3200 case 3: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6543);
3202 case 4: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6540);
3204 case 5: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6541);
3206 case 6: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6542);
3208 case 7: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6543);
3210 case 8: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6540);
3212 case 9: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6541);
3214 case 10: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6542);
3216 case 11: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6543);
3218 case 12: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6540);
3220 case 13: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6541);
3222 case 14: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6542);
3224 case 15: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6543);
3226 case 16: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6540);
3228 case 17: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6541);
3230 case 18: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6542);
3232 case 19: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6543);
3234 case 20: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6540);
3236 case 21: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6541);
3238 case 22: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6542);
3240 case 23: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6543);
3242 case 24: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6540);
3244 case 25: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6541);
3246 case 26: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6542);
3248 case 27: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6543);
3250 case 28: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6540);
3252 case 29: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6541);
3254 case 30: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6542);
3256 case 31: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6543);
3262 case 0: tmp1
= __byte_perm_S (buf0
[0], 0, 0x6540);
3263 buf0
[0] = __byte_perm_S (tmp0
, buf0
[0], 0x7650);
3265 case 1: tmp1
= __byte_perm_S (buf0
[0], 0, 0x6541);
3266 buf0
[0] = __byte_perm_S (tmp0
, buf0
[0], 0x7604);
3268 case 2: tmp1
= __byte_perm_S (buf0
[0], 0, 0x6542);
3269 buf0
[0] = __byte_perm_S (tmp0
, buf0
[0], 0x7054);
3271 case 3: tmp1
= __byte_perm_S (buf0
[0], 0, 0x6543);
3272 buf0
[0] = __byte_perm_S (tmp0
, buf0
[0], 0x0654);
3274 case 4: tmp1
= __byte_perm_S (buf0
[1], 0, 0x6540);
3275 buf0
[1] = __byte_perm_S (tmp0
, buf0
[1], 0x7650);
3277 case 5: tmp1
= __byte_perm_S (buf0
[1], 0, 0x6541);
3278 buf0
[1] = __byte_perm_S (tmp0
, buf0
[1], 0x7604);
3280 case 6: tmp1
= __byte_perm_S (buf0
[1], 0, 0x6542);
3281 buf0
[1] = __byte_perm_S (tmp0
, buf0
[1], 0x7054);
3283 case 7: tmp1
= __byte_perm_S (buf0
[1], 0, 0x6543);
3284 buf0
[1] = __byte_perm_S (tmp0
, buf0
[1], 0x0654);
3286 case 8: tmp1
= __byte_perm_S (buf0
[2], 0, 0x6540);
3287 buf0
[2] = __byte_perm_S (tmp0
, buf0
[2], 0x7650);
3289 case 9: tmp1
= __byte_perm_S (buf0
[2], 0, 0x6541);
3290 buf0
[2] = __byte_perm_S (tmp0
, buf0
[2], 0x7604);
3292 case 10: tmp1
= __byte_perm_S (buf0
[2], 0, 0x6542);
3293 buf0
[2] = __byte_perm_S (tmp0
, buf0
[2], 0x7054);
3295 case 11: tmp1
= __byte_perm_S (buf0
[2], 0, 0x6543);
3296 buf0
[2] = __byte_perm_S (tmp0
, buf0
[2], 0x0654);
3298 case 12: tmp1
= __byte_perm_S (buf0
[3], 0, 0x6540);
3299 buf0
[3] = __byte_perm_S (tmp0
, buf0
[3], 0x7650);
3301 case 13: tmp1
= __byte_perm_S (buf0
[3], 0, 0x6541);
3302 buf0
[3] = __byte_perm_S (tmp0
, buf0
[3], 0x7604);
3304 case 14: tmp1
= __byte_perm_S (buf0
[3], 0, 0x6542);
3305 buf0
[3] = __byte_perm_S (tmp0
, buf0
[3], 0x7054);
3307 case 15: tmp1
= __byte_perm_S (buf0
[3], 0, 0x6543);
3308 buf0
[3] = __byte_perm_S (tmp0
, buf0
[3], 0x0654);
3310 case 16: tmp1
= __byte_perm_S (buf1
[0], 0, 0x6540);
3311 buf1
[0] = __byte_perm_S (tmp0
, buf1
[0], 0x7650);
3313 case 17: tmp1
= __byte_perm_S (buf1
[0], 0, 0x6541);
3314 buf1
[0] = __byte_perm_S (tmp0
, buf1
[0], 0x7604);
3316 case 18: tmp1
= __byte_perm_S (buf1
[0], 0, 0x6542);
3317 buf1
[0] = __byte_perm_S (tmp0
, buf1
[0], 0x7054);
3319 case 19: tmp1
= __byte_perm_S (buf1
[0], 0, 0x6543);
3320 buf1
[0] = __byte_perm_S (tmp0
, buf1
[0], 0x0654);
3322 case 20: tmp1
= __byte_perm_S (buf1
[1], 0, 0x6540);
3323 buf1
[1] = __byte_perm_S (tmp0
, buf1
[1], 0x7650);
3325 case 21: tmp1
= __byte_perm_S (buf1
[1], 0, 0x6541);
3326 buf1
[1] = __byte_perm_S (tmp0
, buf1
[1], 0x7604);
3328 case 22: tmp1
= __byte_perm_S (buf1
[1], 0, 0x6542);
3329 buf1
[1] = __byte_perm_S (tmp0
, buf1
[1], 0x7054);
3331 case 23: tmp1
= __byte_perm_S (buf1
[1], 0, 0x6543);
3332 buf1
[1] = __byte_perm_S (tmp0
, buf1
[1], 0x0654);
3334 case 24: tmp1
= __byte_perm_S (buf1
[2], 0, 0x6540);
3335 buf1
[2] = __byte_perm_S (tmp0
, buf1
[2], 0x7650);
3337 case 25: tmp1
= __byte_perm_S (buf1
[2], 0, 0x6541);
3338 buf1
[2] = __byte_perm_S (tmp0
, buf1
[2], 0x7604);
3340 case 26: tmp1
= __byte_perm_S (buf1
[2], 0, 0x6542);
3341 buf1
[2] = __byte_perm_S (tmp0
, buf1
[2], 0x7054);
3343 case 27: tmp1
= __byte_perm_S (buf1
[2], 0, 0x6543);
3344 buf1
[2] = __byte_perm_S (tmp0
, buf1
[2], 0x0654);
3346 case 28: tmp1
= __byte_perm_S (buf1
[3], 0, 0x6540);
3347 buf1
[3] = __byte_perm_S (tmp0
, buf1
[3], 0x7650);
3349 case 29: tmp1
= __byte_perm_S (buf1
[3], 0, 0x6541);
3350 buf1
[3] = __byte_perm_S (tmp0
, buf1
[3], 0x7604);
3352 case 30: tmp1
= __byte_perm_S (buf1
[3], 0, 0x6542);
3353 buf1
[3] = __byte_perm_S (tmp0
, buf1
[3], 0x7054);
3355 case 31: tmp1
= __byte_perm_S (buf1
[3], 0, 0x6543);
3356 buf1
[3] = __byte_perm_S (tmp0
, buf1
[3], 0x0654);
3362 case 0: buf0
[0] = __byte_perm_S (tmp1
, buf0
[0], 0x7650);
3364 case 1: buf0
[0] = __byte_perm_S (tmp1
, buf0
[0], 0x7604);
3366 case 2: buf0
[0] = __byte_perm_S (tmp1
, buf0
[0], 0x7054);
3368 case 3: buf0
[0] = __byte_perm_S (tmp1
, buf0
[0], 0x0654);
3370 case 4: buf0
[1] = __byte_perm_S (tmp1
, buf0
[1], 0x7650);
3372 case 5: buf0
[1] = __byte_perm_S (tmp1
, buf0
[1], 0x7604);
3374 case 6: buf0
[1] = __byte_perm_S (tmp1
, buf0
[1], 0x7054);
3376 case 7: buf0
[1] = __byte_perm_S (tmp1
, buf0
[1], 0x0654);
3378 case 8: buf0
[2] = __byte_perm_S (tmp1
, buf0
[2], 0x7650);
3380 case 9: buf0
[2] = __byte_perm_S (tmp1
, buf0
[2], 0x7604);
3382 case 10: buf0
[2] = __byte_perm_S (tmp1
, buf0
[2], 0x7054);
3384 case 11: buf0
[2] = __byte_perm_S (tmp1
, buf0
[2], 0x0654);
3386 case 12: buf0
[3] = __byte_perm_S (tmp1
, buf0
[3], 0x7650);
3388 case 13: buf0
[3] = __byte_perm_S (tmp1
, buf0
[3], 0x7604);
3390 case 14: buf0
[3] = __byte_perm_S (tmp1
, buf0
[3], 0x7054);
3392 case 15: buf0
[3] = __byte_perm_S (tmp1
, buf0
[3], 0x0654);
3394 case 16: buf1
[0] = __byte_perm_S (tmp1
, buf1
[0], 0x7650);
3396 case 17: buf1
[0] = __byte_perm_S (tmp1
, buf1
[0], 0x7604);
3398 case 18: buf1
[0] = __byte_perm_S (tmp1
, buf1
[0], 0x7054);
3400 case 19: buf1
[0] = __byte_perm_S (tmp1
, buf1
[0], 0x0654);
3402 case 20: buf1
[1] = __byte_perm_S (tmp1
, buf1
[1], 0x7650);
3404 case 21: buf1
[1] = __byte_perm_S (tmp1
, buf1
[1], 0x7604);
3406 case 22: buf1
[1] = __byte_perm_S (tmp1
, buf1
[1], 0x7054);
3408 case 23: buf1
[1] = __byte_perm_S (tmp1
, buf1
[1], 0x0654);
3410 case 24: buf1
[2] = __byte_perm_S (tmp1
, buf1
[2], 0x7650);
3412 case 25: buf1
[2] = __byte_perm_S (tmp1
, buf1
[2], 0x7604);
3414 case 26: buf1
[2] = __byte_perm_S (tmp1
, buf1
[2], 0x7054);
3416 case 27: buf1
[2] = __byte_perm_S (tmp1
, buf1
[2], 0x0654);
3418 case 28: buf1
[3] = __byte_perm_S (tmp1
, buf1
[3], 0x7650);
3420 case 29: buf1
[3] = __byte_perm_S (tmp1
, buf1
[3], 0x7604);
3422 case 30: buf1
[3] = __byte_perm_S (tmp1
, buf1
[3], 0x7054);
3424 case 31: buf1
[3] = __byte_perm_S (tmp1
, buf1
[3], 0x0654);
3429 #if defined IS_AMD || defined IS_GENERIC
3432 case 0: tmp0
= (buf0
[0] >> 0) & 0xFF;
3434 case 1: tmp0
= (buf0
[0] >> 8) & 0xFF;
3436 case 2: tmp0
= (buf0
[0] >> 16) & 0xFF;
3438 case 3: tmp0
= (buf0
[0] >> 24) & 0xFF;
3440 case 4: tmp0
= (buf0
[1] >> 0) & 0xFF;
3442 case 5: tmp0
= (buf0
[1] >> 8) & 0xFF;
3444 case 6: tmp0
= (buf0
[1] >> 16) & 0xFF;
3446 case 7: tmp0
= (buf0
[1] >> 24) & 0xFF;
3448 case 8: tmp0
= (buf0
[2] >> 0) & 0xFF;
3450 case 9: tmp0
= (buf0
[2] >> 8) & 0xFF;
3452 case 10: tmp0
= (buf0
[2] >> 16) & 0xFF;
3454 case 11: tmp0
= (buf0
[2] >> 24) & 0xFF;
3456 case 12: tmp0
= (buf0
[3] >> 0) & 0xFF;
3458 case 13: tmp0
= (buf0
[3] >> 8) & 0xFF;
3460 case 14: tmp0
= (buf0
[3] >> 16) & 0xFF;
3462 case 15: tmp0
= (buf0
[3] >> 24) & 0xFF;
3464 case 16: tmp0
= (buf1
[0] >> 0) & 0xFF;
3466 case 17: tmp0
= (buf1
[0] >> 8) & 0xFF;
3468 case 18: tmp0
= (buf1
[0] >> 16) & 0xFF;
3470 case 19: tmp0
= (buf1
[0] >> 24) & 0xFF;
3472 case 20: tmp0
= (buf1
[1] >> 0) & 0xFF;
3474 case 21: tmp0
= (buf1
[1] >> 8) & 0xFF;
3476 case 22: tmp0
= (buf1
[1] >> 16) & 0xFF;
3478 case 23: tmp0
= (buf1
[1] >> 24) & 0xFF;
3480 case 24: tmp0
= (buf1
[2] >> 0) & 0xFF;
3482 case 25: tmp0
= (buf1
[2] >> 8) & 0xFF;
3484 case 26: tmp0
= (buf1
[2] >> 16) & 0xFF;
3486 case 27: tmp0
= (buf1
[2] >> 24) & 0xFF;
3488 case 28: tmp0
= (buf1
[3] >> 0) & 0xFF;
3490 case 29: tmp0
= (buf1
[3] >> 8) & 0xFF;
3492 case 30: tmp0
= (buf1
[3] >> 16) & 0xFF;
3494 case 31: tmp0
= (buf1
[3] >> 24) & 0xFF;
3500 case 0: tmp1
= (buf0
[0] >> 0) & 0xff;
3501 buf0
[0] = (buf0
[0] & 0xffffff00) | tmp0
<< 0;
3503 case 1: tmp1
= (buf0
[0] >> 8) & 0xff;
3504 buf0
[0] = (buf0
[0] & 0xffff00ff) | tmp0
<< 8;
3506 case 2: tmp1
= (buf0
[0] >> 16) & 0xff;
3507 buf0
[0] = (buf0
[0] & 0xff00ffff) | tmp0
<< 16;
3509 case 3: tmp1
= (buf0
[0] >> 24) & 0xff;
3510 buf0
[0] = (buf0
[0] & 0x00ffffff) | tmp0
<< 24;
3512 case 4: tmp1
= (buf0
[1] >> 0) & 0xff;
3513 buf0
[1] = (buf0
[1] & 0xffffff00) | tmp0
<< 0;
3515 case 5: tmp1
= (buf0
[1] >> 8) & 0xff;
3516 buf0
[1] = (buf0
[1] & 0xffff00ff) | tmp0
<< 8;
3518 case 6: tmp1
= (buf0
[1] >> 16) & 0xff;
3519 buf0
[1] = (buf0
[1] & 0xff00ffff) | tmp0
<< 16;
3521 case 7: tmp1
= (buf0
[1] >> 24) & 0xff;
3522 buf0
[1] = (buf0
[1] & 0x00ffffff) | tmp0
<< 24;
3524 case 8: tmp1
= (buf0
[2] >> 0) & 0xff;
3525 buf0
[2] = (buf0
[2] & 0xffffff00) | tmp0
<< 0;
3527 case 9: tmp1
= (buf0
[2] >> 8) & 0xff;
3528 buf0
[2] = (buf0
[2] & 0xffff00ff) | tmp0
<< 8;
3530 case 10: tmp1
= (buf0
[2] >> 16) & 0xff;
3531 buf0
[2] = (buf0
[2] & 0xff00ffff) | tmp0
<< 16;
3533 case 11: tmp1
= (buf0
[2] >> 24) & 0xff;
3534 buf0
[2] = (buf0
[2] & 0x00ffffff) | tmp0
<< 24;
3536 case 12: tmp1
= (buf0
[3] >> 0) & 0xff;
3537 buf0
[3] = (buf0
[3] & 0xffffff00) | tmp0
<< 0;
3539 case 13: tmp1
= (buf0
[3] >> 8) & 0xff;
3540 buf0
[3] = (buf0
[3] & 0xffff00ff) | tmp0
<< 8;
3542 case 14: tmp1
= (buf0
[3] >> 16) & 0xff;
3543 buf0
[3] = (buf0
[3] & 0xff00ffff) | tmp0
<< 16;
3545 case 15: tmp1
= (buf0
[3] >> 24) & 0xff;
3546 buf0
[3] = (buf0
[3] & 0x00ffffff) | tmp0
<< 24;
3548 case 16: tmp1
= (buf1
[0] >> 0) & 0xff;
3549 buf1
[0] = (buf1
[0] & 0xffffff00) | tmp0
<< 0;
3551 case 17: tmp1
= (buf1
[0] >> 8) & 0xff;
3552 buf1
[0] = (buf1
[0] & 0xffff00ff) | tmp0
<< 8;
3554 case 18: tmp1
= (buf1
[0] >> 16) & 0xff;
3555 buf1
[0] = (buf1
[0] & 0xff00ffff) | tmp0
<< 16;
3557 case 19: tmp1
= (buf1
[0] >> 24) & 0xff;
3558 buf1
[0] = (buf1
[0] & 0x00ffffff) | tmp0
<< 24;
3560 case 20: tmp1
= (buf1
[1] >> 0) & 0xff;
3561 buf1
[1] = (buf1
[1] & 0xffffff00) | tmp0
<< 0;
3563 case 21: tmp1
= (buf1
[1] >> 8) & 0xff;
3564 buf1
[1] = (buf1
[1] & 0xffff00ff) | tmp0
<< 8;
3566 case 22: tmp1
= (buf1
[1] >> 16) & 0xff;
3567 buf1
[1] = (buf1
[1] & 0xff00ffff) | tmp0
<< 16;
3569 case 23: tmp1
= (buf1
[1] >> 24) & 0xff;
3570 buf1
[1] = (buf1
[1] & 0x00ffffff) | tmp0
<< 24;
3572 case 24: tmp1
= (buf1
[2] >> 0) & 0xff;
3573 buf1
[2] = (buf1
[2] & 0xffffff00) | tmp0
<< 0;
3575 case 25: tmp1
= (buf1
[2] >> 8) & 0xff;
3576 buf1
[2] = (buf1
[2] & 0xffff00ff) | tmp0
<< 8;
3578 case 26: tmp1
= (buf1
[2] >> 16) & 0xff;
3579 buf1
[2] = (buf1
[2] & 0xff00ffff) | tmp0
<< 16;
3581 case 27: tmp1
= (buf1
[2] >> 24) & 0xff;
3582 buf1
[2] = (buf1
[2] & 0x00ffffff) | tmp0
<< 24;
3584 case 28: tmp1
= (buf1
[3] >> 0) & 0xff;
3585 buf1
[3] = (buf1
[3] & 0xffffff00) | tmp0
<< 0;
3587 case 29: tmp1
= (buf1
[3] >> 8) & 0xff;
3588 buf1
[3] = (buf1
[3] & 0xffff00ff) | tmp0
<< 8;
3590 case 30: tmp1
= (buf1
[3] >> 16) & 0xff;
3591 buf1
[3] = (buf1
[3] & 0xff00ffff) | tmp0
<< 16;
3593 case 31: tmp1
= (buf1
[3] >> 24) & 0xff;
3594 buf1
[3] = (buf1
[3] & 0x00ffffff) | tmp0
<< 24;
3600 case 0: buf0
[0] = (buf0
[0] & 0xffffff00) | tmp1
<< 0;
3602 case 1: buf0
[0] = (buf0
[0] & 0xffff00ff) | tmp1
<< 8;
3604 case 2: buf0
[0] = (buf0
[0] & 0xff00ffff) | tmp1
<< 16;
3606 case 3: buf0
[0] = (buf0
[0] & 0x00ffffff) | tmp1
<< 24;
3608 case 4: buf0
[1] = (buf0
[1] & 0xffffff00) | tmp1
<< 0;
3610 case 5: buf0
[1] = (buf0
[1] & 0xffff00ff) | tmp1
<< 8;
3612 case 6: buf0
[1] = (buf0
[1] & 0xff00ffff) | tmp1
<< 16;
3614 case 7: buf0
[1] = (buf0
[1] & 0x00ffffff) | tmp1
<< 24;
3616 case 8: buf0
[2] = (buf0
[2] & 0xffffff00) | tmp1
<< 0;
3618 case 9: buf0
[2] = (buf0
[2] & 0xffff00ff) | tmp1
<< 8;
3620 case 10: buf0
[2] = (buf0
[2] & 0xff00ffff) | tmp1
<< 16;
3622 case 11: buf0
[2] = (buf0
[2] & 0x00ffffff) | tmp1
<< 24;
3624 case 12: buf0
[3] = (buf0
[3] & 0xffffff00) | tmp1
<< 0;
3626 case 13: buf0
[3] = (buf0
[3] & 0xffff00ff) | tmp1
<< 8;
3628 case 14: buf0
[3] = (buf0
[3] & 0xff00ffff) | tmp1
<< 16;
3630 case 15: buf0
[3] = (buf0
[3] & 0x00ffffff) | tmp1
<< 24;
3632 case 16: buf1
[0] = (buf1
[0] & 0xffffff00) | tmp1
<< 0;
3634 case 17: buf1
[0] = (buf1
[0] & 0xffff00ff) | tmp1
<< 8;
3636 case 18: buf1
[0] = (buf1
[0] & 0xff00ffff) | tmp1
<< 16;
3638 case 19: buf1
[0] = (buf1
[0] & 0x00ffffff) | tmp1
<< 24;
3640 case 20: buf1
[1] = (buf1
[1] & 0xffffff00) | tmp1
<< 0;
3642 case 21: buf1
[1] = (buf1
[1] & 0xffff00ff) | tmp1
<< 8;
3644 case 22: buf1
[1] = (buf1
[1] & 0xff00ffff) | tmp1
<< 16;
3646 case 23: buf1
[1] = (buf1
[1] & 0x00ffffff) | tmp1
<< 24;
3648 case 24: buf1
[2] = (buf1
[2] & 0xffffff00) | tmp1
<< 0;
3650 case 25: buf1
[2] = (buf1
[2] & 0xffff00ff) | tmp1
<< 8;
3652 case 26: buf1
[2] = (buf1
[2] & 0xff00ffff) | tmp1
<< 16;
3654 case 27: buf1
[2] = (buf1
[2] & 0x00ffffff) | tmp1
<< 24;
3656 case 28: buf1
[3] = (buf1
[3] & 0xffffff00) | tmp1
<< 0;
3658 case 29: buf1
[3] = (buf1
[3] & 0xffff00ff) | tmp1
<< 8;
3660 case 30: buf1
[3] = (buf1
[3] & 0xff00ffff) | tmp1
<< 16;
3662 case 31: buf1
[3] = (buf1
[3] & 0x00ffffff) | tmp1
<< 24;
3670 static u32
rule_op_mangle_chr_shiftl (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3672 if (p0
>= in_len
) return (in_len
);
3674 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3679 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) << 1) & mr
); break;
3680 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) << 1) & mr
); break;
3681 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) << 1) & mr
); break;
3682 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) << 1) & mr
); break;
3683 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) << 1) & mr
); break;
3684 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) << 1) & mr
); break;
3685 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) << 1) & mr
); break;
3686 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) << 1) & mr
); break;
3692 static u32
rule_op_mangle_chr_shiftr (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3694 if (p0
>= in_len
) return (in_len
);
3696 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3701 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) >> 1) & mr
); break;
3702 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) >> 1) & mr
); break;
3703 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) >> 1) & mr
); break;
3704 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) >> 1) & mr
); break;
3705 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) >> 1) & mr
); break;
3706 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) >> 1) & mr
); break;
3707 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) >> 1) & mr
); break;
3708 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) >> 1) & mr
); break;
3714 static u32
rule_op_mangle_chr_incr (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3716 if (p0
>= in_len
) return (in_len
);
3718 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3721 const u32 n
= 0x01010101 & mr
;
3725 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) + n
) & mr
); break;
3726 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) + n
) & mr
); break;
3727 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) + n
) & mr
); break;
3728 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) + n
) & mr
); break;
3729 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) + n
) & mr
); break;
3730 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) + n
) & mr
); break;
3731 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) + n
) & mr
); break;
3732 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) + n
) & mr
); break;
3738 static u32
rule_op_mangle_chr_decr (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3740 if (p0
>= in_len
) return (in_len
);
3742 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3745 const u32 n
= 0x01010101 & mr
;
3749 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) - n
) & mr
); break;
3750 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) - n
) & mr
); break;
3751 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) - n
) & mr
); break;
3752 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) - n
) & mr
); break;
3753 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) - n
) & mr
); break;
3754 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) - n
) & mr
); break;
3755 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) - n
) & mr
); break;
3756 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) - n
) & mr
); break;
3762 static u32
rule_op_mangle_replace_np1 (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3764 if ((p0
+ 1) >= in_len
) return (in_len
);
3769 lshift_block (buf0
, buf1
, tib40
, tib41
);
3771 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3776 case 0: buf0
[0] = (buf0
[0] & ml
) | (tib40
[0] & mr
); break;
3777 case 1: buf0
[1] = (buf0
[1] & ml
) | (tib40
[1] & mr
); break;
3778 case 2: buf0
[2] = (buf0
[2] & ml
) | (tib40
[2] & mr
); break;
3779 case 3: buf0
[3] = (buf0
[3] & ml
) | (tib40
[3] & mr
); break;
3780 case 4: buf1
[0] = (buf1
[0] & ml
) | (tib41
[0] & mr
); break;
3781 case 5: buf1
[1] = (buf1
[1] & ml
) | (tib41
[1] & mr
); break;
3782 case 6: buf1
[2] = (buf1
[2] & ml
) | (tib41
[2] & mr
); break;
3783 case 7: buf1
[3] = (buf1
[3] & ml
) | (tib41
[3] & mr
); break;
3789 static u32
rule_op_mangle_replace_nm1 (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3791 if (p0
== 0) return (in_len
);
3793 if (p0
>= in_len
) return (in_len
);
3798 rshift_block (buf0
, buf1
, tib40
, tib41
);
3800 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3805 case 0: buf0
[0] = (buf0
[0] & ml
) | (tib40
[0] & mr
); break;
3806 case 1: buf0
[1] = (buf0
[1] & ml
) | (tib40
[1] & mr
); break;
3807 case 2: buf0
[2] = (buf0
[2] & ml
) | (tib40
[2] & mr
); break;
3808 case 3: buf0
[3] = (buf0
[3] & ml
) | (tib40
[3] & mr
); break;
3809 case 4: buf1
[0] = (buf1
[0] & ml
) | (tib41
[0] & mr
); break;
3810 case 5: buf1
[1] = (buf1
[1] & ml
) | (tib41
[1] & mr
); break;
3811 case 6: buf1
[2] = (buf1
[2] & ml
) | (tib41
[2] & mr
); break;
3812 case 7: buf1
[3] = (buf1
[3] & ml
) | (tib41
[3] & mr
); break;
3818 static u32
rule_op_mangle_dupeblock_first (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3820 if (p0
> in_len
) return (in_len
);
3822 if ((in_len
+ p0
) >= 32) return (in_len
);
3824 u32 out_len
= in_len
;
3838 truncate_right (tib40
, tib41
, p0
);
3840 rshift_block_N (buf0
, buf1
, buf0
, buf1
, p0
);
3842 buf0
[0] |= tib40
[0];
3843 buf0
[1] |= tib40
[1];
3844 buf0
[2] |= tib40
[2];
3845 buf0
[3] |= tib40
[3];
3846 buf1
[0] |= tib41
[0];
3847 buf1
[1] |= tib41
[1];
3848 buf1
[2] |= tib41
[2];
3849 buf1
[3] |= tib41
[3];
3856 static u32
rule_op_mangle_dupeblock_last (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3858 if (p0
> in_len
) return (in_len
);
3860 if ((in_len
+ p0
) >= 32) return (in_len
);
3862 u32 out_len
= in_len
;
3867 rshift_block_N (buf0
, buf1
, tib40
, tib41
, p0
);
3869 truncate_left (tib40
, tib41
, out_len
);
3871 buf0
[0] |= tib40
[0];
3872 buf0
[1] |= tib40
[1];
3873 buf0
[2] |= tib40
[2];
3874 buf0
[3] |= tib40
[3];
3875 buf1
[0] |= tib41
[0];
3876 buf1
[1] |= tib41
[1];
3877 buf1
[2] |= tib41
[2];
3878 buf1
[3] |= tib41
[3];
3885 static u32
rule_op_mangle_title (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3887 buf0
[0] |= (generate_cmask (buf0
[0]));
3888 buf0
[1] |= (generate_cmask (buf0
[1]));
3889 buf0
[2] |= (generate_cmask (buf0
[2]));
3890 buf0
[3] |= (generate_cmask (buf0
[3]));
3891 buf1
[0] |= (generate_cmask (buf1
[0]));
3892 buf1
[1] |= (generate_cmask (buf1
[1]));
3893 buf1
[2] |= (generate_cmask (buf1
[2]));
3894 buf1
[3] |= (generate_cmask (buf1
[3]));
3897 buf0
[0] &= ~(0x00000020 & generate_cmask (buf0
[0]));
3899 for (u32 i
= 0; i
< in_len
; i
++)
3906 case 0: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6540);
3907 tmp1
= ~(0x00002000 & generate_cmask (buf0
[0])); break;
3908 case 1: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6541);
3909 tmp1
= ~(0x00200000 & generate_cmask (buf0
[0])); break;
3910 case 2: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6542);
3911 tmp1
= ~(0x20000000 & generate_cmask (buf0
[0])); break;
3912 case 3: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6543);
3913 tmp1
= ~(0x00000020 & generate_cmask (buf0
[1])); break;
3914 case 4: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6540);
3915 tmp1
= ~(0x00002000 & generate_cmask (buf0
[1])); break;
3916 case 5: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6541);
3917 tmp1
= ~(0x00200000 & generate_cmask (buf0
[1])); break;
3918 case 6: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6542);
3919 tmp1
= ~(0x20000000 & generate_cmask (buf0
[1])); break;
3920 case 7: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6543);
3921 tmp1
= ~(0x00000020 & generate_cmask (buf0
[2])); break;
3922 case 8: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6540);
3923 tmp1
= ~(0x00002000 & generate_cmask (buf0
[2])); break;
3924 case 9: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6541);
3925 tmp1
= ~(0x00200000 & generate_cmask (buf0
[2])); break;
3926 case 10: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6542);
3927 tmp1
= ~(0x20000000 & generate_cmask (buf0
[2])); break;
3928 case 11: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6543);
3929 tmp1
= ~(0x00000020 & generate_cmask (buf0
[3])); break;
3930 case 12: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6540);
3931 tmp1
= ~(0x00002000 & generate_cmask (buf0
[3])); break;
3932 case 13: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6541);
3933 tmp1
= ~(0x00200000 & generate_cmask (buf0
[3])); break;
3934 case 14: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6542);
3935 tmp1
= ~(0x20000000 & generate_cmask (buf0
[3])); break;
3936 case 15: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6543);
3937 tmp1
= ~(0x00000020 & generate_cmask (buf1
[0])); break;
3938 case 16: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6540);
3939 tmp1
= ~(0x00002000 & generate_cmask (buf1
[0])); break;
3940 case 17: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6541);
3941 tmp1
= ~(0x00200000 & generate_cmask (buf1
[0])); break;
3942 case 18: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6542);
3943 tmp1
= ~(0x20000000 & generate_cmask (buf1
[0])); break;
3944 case 19: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6543);
3945 tmp1
= ~(0x00000020 & generate_cmask (buf1
[1])); break;
3946 case 20: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6540);
3947 tmp1
= ~(0x00002000 & generate_cmask (buf1
[1])); break;
3948 case 21: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6541);
3949 tmp1
= ~(0x00200000 & generate_cmask (buf1
[1])); break;
3950 case 22: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6542);
3951 tmp1
= ~(0x20000000 & generate_cmask (buf1
[1])); break;
3952 case 23: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6543);
3953 tmp1
= ~(0x00000020 & generate_cmask (buf1
[2])); break;
3954 case 24: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6540);
3955 tmp1
= ~(0x00002000 & generate_cmask (buf1
[2])); break;
3956 case 25: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6541);
3957 tmp1
= ~(0x00200000 & generate_cmask (buf1
[2])); break;
3958 case 26: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6542);
3959 tmp1
= ~(0x20000000 & generate_cmask (buf1
[2])); break;
3960 case 27: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6543);
3961 tmp1
= ~(0x00000020 & generate_cmask (buf1
[3])); break;
3962 case 28: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6540);
3963 tmp1
= ~(0x00002000 & generate_cmask (buf1
[3])); break;
3964 case 29: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6541);
3965 tmp1
= ~(0x00200000 & generate_cmask (buf1
[3])); break;
3966 case 30: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6542);
3967 tmp1
= ~(0x20000000 & generate_cmask (buf1
[3])); break;
3972 if (tmp0
== ' ') buf0
[0] &= tmp1
;
3976 if (tmp0
== ' ') buf0
[1] &= tmp1
;
3980 if (tmp0
== ' ') buf0
[2] &= tmp1
;
3984 if (tmp0
== ' ') buf0
[3] &= tmp1
;
3988 if (tmp0
== ' ') buf1
[0] &= tmp1
;
3992 if (tmp0
== ' ') buf1
[1] &= tmp1
;
3996 if (tmp0
== ' ') buf1
[2] &= tmp1
;
4000 if (tmp0
== ' ') buf1
[3] &= tmp1
;
4005 #if defined IS_AMD || defined IS_GENERIC
4009 const uchar4 tmp0
= (uchar4
) (' ');
4010 const uchar4 tmp1
= (uchar4
) (0x00);
4011 const uchar4 tmp2
= (uchar4
) (0xff);
4015 tmp
= as_uchar4 (buf0
[0]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[0] = as_uint (tmp
);
4016 tmp
= as_uchar4 (buf0
[1]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[1] = as_uint (tmp
);
4017 tmp
= as_uchar4 (buf0
[2]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[2] = as_uint (tmp
);
4018 tmp
= as_uchar4 (buf0
[3]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[3] = as_uint (tmp
);
4019 tmp
= as_uchar4 (buf1
[0]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[0] = as_uint (tmp
);
4020 tmp
= as_uchar4 (buf1
[1]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[1] = as_uint (tmp
);
4021 tmp
= as_uchar4 (buf1
[2]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[2] = as_uint (tmp
);
4022 tmp
= as_uchar4 (buf1
[3]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[3] = as_uint (tmp
);
4024 rshift_block (tib40
, tib41
, tib40
, tib41
); tib40
[0] |= 0xff;
4026 buf0
[0] &= ~(generate_cmask (buf0
[0]) & tib40
[0]);
4027 buf0
[1] &= ~(generate_cmask (buf0
[1]) & tib40
[1]);
4028 buf0
[2] &= ~(generate_cmask (buf0
[2]) & tib40
[2]);
4029 buf0
[3] &= ~(generate_cmask (buf0
[3]) & tib40
[3]);
4030 buf1
[0] &= ~(generate_cmask (buf1
[0]) & tib41
[0]);
4031 buf1
[1] &= ~(generate_cmask (buf1
[1]) & tib41
[1]);
4032 buf1
[2] &= ~(generate_cmask (buf1
[2]) & tib41
[2]);
4033 buf1
[3] &= ~(generate_cmask (buf1
[3]) & tib41
[3]);
4039 u32
apply_rule (const u32 name
, const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
4041 u32 out_len
= in_len
;
4045 case RULE_OP_MANGLE_LREST
: out_len
= rule_op_mangle_lrest (p0
, p1
, buf0
, buf1
, out_len
); break;
4046 case RULE_OP_MANGLE_UREST
: out_len
= rule_op_mangle_urest (p0
, p1
, buf0
, buf1
, out_len
); break;
4047 case RULE_OP_MANGLE_LREST_UFIRST
: out_len
= rule_op_mangle_lrest_ufirst (p0
, p1
, buf0
, buf1
, out_len
); break;
4048 case RULE_OP_MANGLE_UREST_LFIRST
: out_len
= rule_op_mangle_urest_lfirst (p0
, p1
, buf0
, buf1
, out_len
); break;
4049 case RULE_OP_MANGLE_TREST
: out_len
= rule_op_mangle_trest (p0
, p1
, buf0
, buf1
, out_len
); break;
4050 case RULE_OP_MANGLE_TOGGLE_AT
: out_len
= rule_op_mangle_toggle_at (p0
, p1
, buf0
, buf1
, out_len
); break;
4051 case RULE_OP_MANGLE_REVERSE
: out_len
= rule_op_mangle_reverse (p0
, p1
, buf0
, buf1
, out_len
); break;
4052 case RULE_OP_MANGLE_DUPEWORD
: out_len
= rule_op_mangle_dupeword (p0
, p1
, buf0
, buf1
, out_len
); break;
4053 case RULE_OP_MANGLE_DUPEWORD_TIMES
: out_len
= rule_op_mangle_dupeword_times (p0
, p1
, buf0
, buf1
, out_len
); break;
4054 case RULE_OP_MANGLE_REFLECT
: out_len
= rule_op_mangle_reflect (p0
, p1
, buf0
, buf1
, out_len
); break;
4055 case RULE_OP_MANGLE_APPEND
: out_len
= rule_op_mangle_append (p0
, p1
, buf0
, buf1
, out_len
); break;
4056 case RULE_OP_MANGLE_PREPEND
: out_len
= rule_op_mangle_prepend (p0
, p1
, buf0
, buf1
, out_len
); break;
4057 case RULE_OP_MANGLE_ROTATE_LEFT
: out_len
= rule_op_mangle_rotate_left (p0
, p1
, buf0
, buf1
, out_len
); break;
4058 case RULE_OP_MANGLE_ROTATE_RIGHT
: out_len
= rule_op_mangle_rotate_right (p0
, p1
, buf0
, buf1
, out_len
); break;
4059 case RULE_OP_MANGLE_DELETE_FIRST
: out_len
= rule_op_mangle_delete_first (p0
, p1
, buf0
, buf1
, out_len
); break;
4060 case RULE_OP_MANGLE_DELETE_LAST
: out_len
= rule_op_mangle_delete_last (p0
, p1
, buf0
, buf1
, out_len
); break;
4061 case RULE_OP_MANGLE_DELETE_AT
: out_len
= rule_op_mangle_delete_at (p0
, p1
, buf0
, buf1
, out_len
); break;
4062 case RULE_OP_MANGLE_EXTRACT
: out_len
= rule_op_mangle_extract (p0
, p1
, buf0
, buf1
, out_len
); break;
4063 case RULE_OP_MANGLE_OMIT
: out_len
= rule_op_mangle_omit (p0
, p1
, buf0
, buf1
, out_len
); break;
4064 case RULE_OP_MANGLE_INSERT
: out_len
= rule_op_mangle_insert (p0
, p1
, buf0
, buf1
, out_len
); break;
4065 case RULE_OP_MANGLE_OVERSTRIKE
: out_len
= rule_op_mangle_overstrike (p0
, p1
, buf0
, buf1
, out_len
); break;
4066 case RULE_OP_MANGLE_TRUNCATE_AT
: out_len
= rule_op_mangle_truncate_at (p0
, p1
, buf0
, buf1
, out_len
); break;
4067 case RULE_OP_MANGLE_REPLACE
: out_len
= rule_op_mangle_replace (p0
, p1
, buf0
, buf1
, out_len
); break;
4068 //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
4069 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
4070 case RULE_OP_MANGLE_DUPECHAR_FIRST
: out_len
= rule_op_mangle_dupechar_first (p0
, p1
, buf0
, buf1
, out_len
); break;
4071 case RULE_OP_MANGLE_DUPECHAR_LAST
: out_len
= rule_op_mangle_dupechar_last (p0
, p1
, buf0
, buf1
, out_len
); break;
4072 case RULE_OP_MANGLE_DUPECHAR_ALL
: out_len
= rule_op_mangle_dupechar_all (p0
, p1
, buf0
, buf1
, out_len
); break;
4073 case RULE_OP_MANGLE_SWITCH_FIRST
: out_len
= rule_op_mangle_switch_first (p0
, p1
, buf0
, buf1
, out_len
); break;
4074 case RULE_OP_MANGLE_SWITCH_LAST
: out_len
= rule_op_mangle_switch_last (p0
, p1
, buf0
, buf1
, out_len
); break;
4075 case RULE_OP_MANGLE_SWITCH_AT
: out_len
= rule_op_mangle_switch_at (p0
, p1
, buf0
, buf1
, out_len
); break;
4076 case RULE_OP_MANGLE_CHR_SHIFTL
: out_len
= rule_op_mangle_chr_shiftl (p0
, p1
, buf0
, buf1
, out_len
); break;
4077 case RULE_OP_MANGLE_CHR_SHIFTR
: out_len
= rule_op_mangle_chr_shiftr (p0
, p1
, buf0
, buf1
, out_len
); break;
4078 case RULE_OP_MANGLE_CHR_INCR
: out_len
= rule_op_mangle_chr_incr (p0
, p1
, buf0
, buf1
, out_len
); break;
4079 case RULE_OP_MANGLE_CHR_DECR
: out_len
= rule_op_mangle_chr_decr (p0
, p1
, buf0
, buf1
, out_len
); break;
4080 case RULE_OP_MANGLE_REPLACE_NP1
: out_len
= rule_op_mangle_replace_np1 (p0
, p1
, buf0
, buf1
, out_len
); break;
4081 case RULE_OP_MANGLE_REPLACE_NM1
: out_len
= rule_op_mangle_replace_nm1 (p0
, p1
, buf0
, buf1
, out_len
); break;
4082 case RULE_OP_MANGLE_DUPEBLOCK_FIRST
: out_len
= rule_op_mangle_dupeblock_first (p0
, p1
, buf0
, buf1
, out_len
); break;
4083 case RULE_OP_MANGLE_DUPEBLOCK_LAST
: out_len
= rule_op_mangle_dupeblock_last (p0
, p1
, buf0
, buf1
, out_len
); break;
4084 case RULE_OP_MANGLE_TITLE
: out_len
= rule_op_mangle_title (p0
, p1
, buf0
, buf1
, out_len
); break;
4090 u32
apply_rules (const __global u32
*cmds
, u32 buf0
[4], u32 buf1
[4], const u32 len
)
4094 for (u32 i
= 0; cmds
[i
] != 0; i
++)
4096 const u32 cmd
= cmds
[i
];
4098 const u32 name
= (cmd
>> 0) & 0xff;
4099 const u32 p0
= (cmd
>> 8) & 0xff;
4100 const u32 p1
= (cmd
>> 16) & 0xff;
4102 out_len
= apply_rule (name
, p0
, p1
, buf0
, buf1
, out_len
);
4108 u32
apply_rules_vect (const u32 pw_buf0
[4], const u32 pw_buf1
[4], const u32 pw_len
, const __global kernel_rule_t
*rules_buf
, const u32 il_pos
, u32x w0
[4], u32x w1
[4])
4121 return apply_rules (rules_buf
[il_pos
].cmds
, w0
, w1
, pw_len
);
4128 for (int i
= 0; i
< VECT_SIZE
; i
++)
4133 tmp0
[0] = pw_buf0
[0];
4134 tmp0
[1] = pw_buf0
[1];
4135 tmp0
[2] = pw_buf0
[2];
4136 tmp0
[3] = pw_buf0
[3];
4137 tmp1
[0] = pw_buf1
[0];
4138 tmp1
[1] = pw_buf1
[1];
4139 tmp1
[2] = pw_buf1
[2];
4140 tmp1
[3] = pw_buf1
[3];
4142 out_len
= apply_rules (rules_buf
[il_pos
+ i
].cmds
, tmp0
, tmp1
, pw_len
);
4144 // it's guaranteed to have out_len always the same for each call in the loop