2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
8 u32
apply_rule (const u32 name
, const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
);
9 u32
apply_rules (const __global u32
*cmds
, u32 buf0
[4], u32 buf1
[4], const u32 len
);
10 u32x
apply_rules_vect (const u32 pw_buf0
[4], const u32 pw_buf1
[4], const u32 pw_len
, const __global kernel_rule_t
*rules_buf
, const u32 il_pos
, u32x w0
[4], u32x w1
[4]);
12 static u32
generate_cmask (u32 buf
)
14 const u32 rmask
= ((buf
& 0x40404040) >> 1)
15 & ~((buf
& 0x80808080) >> 2);
17 const u32 hmask
= (buf
& 0x1f1f1f1f) + 0x05050505;
18 const u32 lmask
= (buf
& 0x1f1f1f1f) + 0x1f1f1f1f;
20 return rmask
& ~hmask
& lmask
;
23 static void truncate_right (u32 w0
[4], u32 w1
[4], const u32 len
)
25 const u32 tmp
= (1 << ((len
% 4) * 8)) - 1;
76 static void truncate_left (u32 w0
[4], u32 w1
[4], const u32 len
)
78 const u32 tmp
= ~((1 << ((len
% 4) * 8)) - 1);
129 static void lshift_block (const u32 in0
[4], const u32 in1
[4], u32 out0
[4], u32 out1
[4])
132 out0
[0] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
133 out0
[1] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
134 out0
[2] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
135 out0
[3] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
136 out1
[0] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
137 out1
[1] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
138 out1
[2] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
139 out1
[3] = __byte_perm_S (in1
[3], 0, 0x4321);
142 #if defined IS_AMD || defined IS_GENERIC
143 out0
[0] = amd_bytealign_S (in0
[1], in0
[0], 1);
144 out0
[1] = amd_bytealign_S (in0
[2], in0
[1], 1);
145 out0
[2] = amd_bytealign_S (in0
[3], in0
[2], 1);
146 out0
[3] = amd_bytealign_S (in1
[0], in0
[3], 1);
147 out1
[0] = amd_bytealign_S (in1
[1], in1
[0], 1);
148 out1
[1] = amd_bytealign_S (in1
[2], in1
[1], 1);
149 out1
[2] = amd_bytealign_S (in1
[3], in1
[2], 1);
150 out1
[3] = amd_bytealign_S ( 0, in1
[3], 1);
154 static void rshift_block (const u32 in0
[4], const u32 in1
[4], u32 out0
[4], u32 out1
[4])
157 out1
[3] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
158 out1
[2] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
159 out1
[1] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
160 out1
[0] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
161 out0
[3] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
162 out0
[2] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
163 out0
[1] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
164 out0
[0] = __byte_perm_S ( 0, in0
[0], 0x6543);
167 #if defined IS_AMD || defined IS_GENERIC
168 out1
[3] = amd_bytealign_S (in1
[3], in1
[2], 3);
169 out1
[2] = amd_bytealign_S (in1
[2], in1
[1], 3);
170 out1
[1] = amd_bytealign_S (in1
[1], in1
[0], 3);
171 out1
[0] = amd_bytealign_S (in1
[0], in0
[3], 3);
172 out0
[3] = amd_bytealign_S (in0
[3], in0
[2], 3);
173 out0
[2] = amd_bytealign_S (in0
[2], in0
[1], 3);
174 out0
[1] = amd_bytealign_S (in0
[1], in0
[0], 3);
175 out0
[0] = amd_bytealign_S (in0
[0], 0, 3);
179 static void lshift_block_N (const u32 in0
[4], const u32 in1
[4], u32 out0
[4], u32 out1
[4], const u32 num
)
184 case 0: out0
[0] = in0
[0];
193 case 1: out0
[0] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
194 out0
[1] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
195 out0
[2] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
196 out0
[3] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
197 out1
[0] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
198 out1
[1] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
199 out1
[2] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
200 out1
[3] = __byte_perm_S (in1
[3], 0, 0x4321);
202 case 2: out0
[0] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
203 out0
[1] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
204 out0
[2] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
205 out0
[3] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
206 out1
[0] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
207 out1
[1] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
208 out1
[2] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
209 out1
[3] = __byte_perm_S (in1
[3], 0, 0x5432);
211 case 3: out0
[0] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
212 out0
[1] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
213 out0
[2] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
214 out0
[3] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
215 out1
[0] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
216 out1
[1] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
217 out1
[2] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
218 out1
[3] = __byte_perm_S (in1
[3], 0, 0x6543);
220 case 4: out0
[0] = in0
[1];
229 case 5: out0
[0] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
230 out0
[1] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
231 out0
[2] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
232 out0
[3] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
233 out1
[0] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
234 out1
[1] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
235 out1
[2] = __byte_perm_S (in1
[3], 0, 0x4321);
238 case 6: out0
[0] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
239 out0
[1] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
240 out0
[2] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
241 out0
[3] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
242 out1
[0] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
243 out1
[1] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
244 out1
[2] = __byte_perm_S (in1
[3], 0, 0x5432);
247 case 7: out0
[0] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
248 out0
[1] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
249 out0
[2] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
250 out0
[3] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
251 out1
[0] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
252 out1
[1] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
253 out1
[2] = __byte_perm_S (in1
[3], 0, 0x6543);
256 case 8: out0
[0] = in0
[2];
265 case 9: out0
[0] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
266 out0
[1] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
267 out0
[2] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
268 out0
[3] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
269 out1
[0] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
270 out1
[1] = __byte_perm_S (in1
[3], 0, 0x4321);
274 case 10: out0
[0] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
275 out0
[1] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
276 out0
[2] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
277 out0
[3] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
278 out1
[0] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
279 out1
[1] = __byte_perm_S (in1
[3], 0, 0x5432);
283 case 11: out0
[0] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
284 out0
[1] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
285 out0
[2] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
286 out0
[3] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
287 out1
[0] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
288 out1
[1] = __byte_perm_S (in1
[3], 0, 0x6543);
292 case 12: out0
[0] = in0
[3];
302 out0
[0] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
303 out0
[1] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
304 out0
[2] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
305 out0
[3] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
306 out1
[0] = __byte_perm_S (in1
[3], 0, 0x4321);
311 case 14: out0
[0] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
312 out0
[1] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
313 out0
[2] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
314 out0
[3] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
315 out1
[0] = __byte_perm_S (in1
[3], 0, 0x5432);
320 case 15: out0
[0] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
321 out0
[1] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
322 out0
[2] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
323 out0
[3] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
324 out1
[0] = __byte_perm_S (in1
[3], 0, 0x6543);
329 case 16: out0
[0] = in1
[0];
338 case 17: out0
[0] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
339 out0
[1] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
340 out0
[2] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
341 out0
[3] = __byte_perm_S (in1
[3], 0, 0x4321);
347 case 18: out0
[0] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
348 out0
[1] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
349 out0
[2] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
350 out0
[3] = __byte_perm_S (in1
[3], 0, 0x5432);
356 case 19: out0
[0] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
357 out0
[1] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
358 out0
[2] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
359 out0
[3] = __byte_perm_S (in1
[3], 0, 0x6543);
365 case 20: out0
[0] = in1
[1];
374 case 21: out0
[0] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
375 out0
[1] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
376 out0
[2] = __byte_perm_S (in1
[3], 0, 0x4321);
383 case 22: out0
[0] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
384 out0
[1] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
385 out0
[2] = __byte_perm_S (in1
[3], 0, 0x5432);
392 case 23: out0
[0] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
393 out0
[1] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
394 out0
[2] = __byte_perm_S (in1
[3], 0, 0x6543);
401 case 24: out0
[0] = in1
[2];
410 case 25: out0
[0] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
411 out0
[1] = __byte_perm_S (in1
[3], 0, 0x4321);
419 case 26: out0
[0] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
420 out0
[1] = __byte_perm_S (in1
[3], 0, 0x5432);
428 case 27: out0
[0] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
429 out0
[1] = __byte_perm_S (in1
[3], 0, 0x6543);
437 case 28: out0
[0] = in1
[3];
446 case 29: out0
[0] = __byte_perm_S (in1
[3], 0, 0x4321);
455 case 30: out0
[0] = __byte_perm_S (in1
[3], 0, 0x5432);
464 case 31: out0
[0] = __byte_perm_S (in1
[3], 0, 0x6543);
476 #if defined IS_AMD || defined IS_GENERIC
479 case 0: out0
[0] = in0
[0];
488 case 1: out0
[0] = amd_bytealign_S (in0
[1], in0
[0], 1);
489 out0
[1] = amd_bytealign_S (in0
[2], in0
[1], 1);
490 out0
[2] = amd_bytealign_S (in0
[3], in0
[2], 1);
491 out0
[3] = amd_bytealign_S (in1
[0], in0
[3], 1);
492 out1
[0] = amd_bytealign_S (in1
[1], in1
[0], 1);
493 out1
[1] = amd_bytealign_S (in1
[2], in1
[1], 1);
494 out1
[2] = amd_bytealign_S (in1
[3], in1
[2], 1);
495 out1
[3] = amd_bytealign_S ( 0, in1
[3], 1);
497 case 2: out0
[0] = amd_bytealign_S (in0
[1], in0
[0], 2);
498 out0
[1] = amd_bytealign_S (in0
[2], in0
[1], 2);
499 out0
[2] = amd_bytealign_S (in0
[3], in0
[2], 2);
500 out0
[3] = amd_bytealign_S (in1
[0], in0
[3], 2);
501 out1
[0] = amd_bytealign_S (in1
[1], in1
[0], 2);
502 out1
[1] = amd_bytealign_S (in1
[2], in1
[1], 2);
503 out1
[2] = amd_bytealign_S (in1
[3], in1
[2], 2);
504 out1
[3] = amd_bytealign_S ( 0, in1
[3], 2);
506 case 3: out0
[0] = amd_bytealign_S (in0
[1], in0
[0], 3);
507 out0
[1] = amd_bytealign_S (in0
[2], in0
[1], 3);
508 out0
[2] = amd_bytealign_S (in0
[3], in0
[2], 3);
509 out0
[3] = amd_bytealign_S (in1
[0], in0
[3], 3);
510 out1
[0] = amd_bytealign_S (in1
[1], in1
[0], 3);
511 out1
[1] = amd_bytealign_S (in1
[2], in1
[1], 3);
512 out1
[2] = amd_bytealign_S (in1
[3], in1
[2], 3);
513 out1
[3] = amd_bytealign_S ( 0, in1
[3], 3);
515 case 4: out0
[0] = in0
[1];
524 case 5: out0
[0] = amd_bytealign_S (in0
[2], in0
[1], 1);
525 out0
[1] = amd_bytealign_S (in0
[3], in0
[2], 1);
526 out0
[2] = amd_bytealign_S (in1
[0], in0
[3], 1);
527 out0
[3] = amd_bytealign_S (in1
[1], in1
[0], 1);
528 out1
[0] = amd_bytealign_S (in1
[2], in1
[1], 1);
529 out1
[1] = amd_bytealign_S (in1
[3], in1
[2], 1);
530 out1
[2] = amd_bytealign_S ( 0, in1
[3], 1);
533 case 6: out0
[0] = amd_bytealign_S (in0
[2], in0
[1], 2);
534 out0
[1] = amd_bytealign_S (in0
[3], in0
[2], 2);
535 out0
[2] = amd_bytealign_S (in1
[0], in0
[3], 2);
536 out0
[3] = amd_bytealign_S (in1
[1], in1
[0], 2);
537 out1
[0] = amd_bytealign_S (in1
[2], in1
[1], 2);
538 out1
[1] = amd_bytealign_S (in1
[3], in1
[2], 2);
539 out1
[2] = amd_bytealign_S ( 0, in1
[3], 2);
542 case 7: out0
[0] = amd_bytealign_S (in0
[2], in0
[1], 3);
543 out0
[1] = amd_bytealign_S (in0
[3], in0
[2], 3);
544 out0
[2] = amd_bytealign_S (in1
[0], in0
[3], 3);
545 out0
[3] = amd_bytealign_S (in1
[1], in1
[0], 3);
546 out1
[0] = amd_bytealign_S (in1
[2], in1
[1], 3);
547 out1
[1] = amd_bytealign_S (in1
[3], in1
[2], 3);
548 out1
[2] = amd_bytealign_S ( 0, in1
[3], 3);
551 case 8: out0
[0] = in0
[2];
560 case 9: out0
[0] = amd_bytealign_S (in0
[3], in0
[2], 1);
561 out0
[1] = amd_bytealign_S (in1
[0], in0
[3], 1);
562 out0
[2] = amd_bytealign_S (in1
[1], in1
[0], 1);
563 out0
[3] = amd_bytealign_S (in1
[2], in1
[1], 1);
564 out1
[0] = amd_bytealign_S (in1
[3], in1
[2], 1);
565 out1
[1] = amd_bytealign_S ( 0, in1
[3], 1);
569 case 10: out0
[0] = amd_bytealign_S (in0
[3], in0
[2], 2);
570 out0
[1] = amd_bytealign_S (in1
[0], in0
[3], 2);
571 out0
[2] = amd_bytealign_S (in1
[1], in1
[0], 2);
572 out0
[3] = amd_bytealign_S (in1
[2], in1
[1], 2);
573 out1
[0] = amd_bytealign_S (in1
[3], in1
[2], 2);
574 out1
[1] = amd_bytealign_S ( 0, in1
[3], 2);
578 case 11: out0
[0] = amd_bytealign_S (in0
[3], in0
[2], 3);
579 out0
[1] = amd_bytealign_S (in1
[0], in0
[3], 3);
580 out0
[2] = amd_bytealign_S (in1
[1], in1
[0], 3);
581 out0
[3] = amd_bytealign_S (in1
[2], in1
[1], 3);
582 out1
[0] = amd_bytealign_S (in1
[3], in1
[2], 3);
583 out1
[1] = amd_bytealign_S ( 0, in1
[3], 3);
587 case 12: out0
[0] = in0
[3];
596 case 13: out0
[0] = amd_bytealign_S (in1
[0], in0
[3], 1);
597 out0
[1] = amd_bytealign_S (in1
[1], in1
[0], 1);
598 out0
[2] = amd_bytealign_S (in1
[2], in1
[1], 1);
599 out0
[3] = amd_bytealign_S (in1
[3], in1
[2], 1);
600 out1
[0] = amd_bytealign_S ( 0, in1
[3], 1);
605 case 14: out0
[0] = amd_bytealign_S (in1
[0], in0
[3], 2);
606 out0
[1] = amd_bytealign_S (in1
[1], in1
[0], 2);
607 out0
[2] = amd_bytealign_S (in1
[2], in1
[1], 2);
608 out0
[3] = amd_bytealign_S (in1
[3], in1
[2], 2);
609 out1
[0] = amd_bytealign_S ( 0, in1
[3], 2);
614 case 15: out0
[0] = amd_bytealign_S (in1
[0], in0
[3], 3);
615 out0
[1] = amd_bytealign_S (in1
[1], in1
[0], 3);
616 out0
[2] = amd_bytealign_S (in1
[2], in1
[1], 3);
617 out0
[3] = amd_bytealign_S (in1
[3], in1
[2], 3);
618 out1
[0] = amd_bytealign_S ( 0, in1
[3], 3);
623 case 16: out0
[0] = in1
[0];
632 case 17: out0
[0] = amd_bytealign_S (in1
[1], in1
[0], 1);
633 out0
[1] = amd_bytealign_S (in1
[2], in1
[1], 1);
634 out0
[2] = amd_bytealign_S (in1
[3], in1
[2], 1);
635 out0
[3] = amd_bytealign_S ( 0, in1
[3], 1);
641 case 18: out0
[0] = amd_bytealign_S (in1
[1], in1
[0], 2);
642 out0
[1] = amd_bytealign_S (in1
[2], in1
[1], 2);
643 out0
[2] = amd_bytealign_S (in1
[3], in1
[2], 2);
644 out0
[3] = amd_bytealign_S ( 0, in1
[3], 2);
650 case 19: out0
[0] = amd_bytealign_S (in1
[1], in1
[0], 3);
651 out0
[1] = amd_bytealign_S (in1
[2], in1
[1], 3);
652 out0
[2] = amd_bytealign_S (in1
[3], in1
[2], 3);
653 out0
[3] = amd_bytealign_S ( 0, in1
[3], 3);
659 case 20: out0
[0] = in1
[1];
668 case 21: out0
[0] = amd_bytealign_S (in1
[2], in1
[1], 1);
669 out0
[1] = amd_bytealign_S (in1
[3], in1
[2], 1);
670 out0
[2] = amd_bytealign_S ( 0, in1
[3], 1);
677 case 22: out0
[0] = amd_bytealign_S (in1
[2], in1
[1], 2);
678 out0
[1] = amd_bytealign_S (in1
[3], in1
[2], 2);
679 out0
[2] = amd_bytealign_S ( 0, in1
[3], 2);
686 case 23: out0
[0] = amd_bytealign_S (in1
[2], in1
[1], 3);
687 out0
[1] = amd_bytealign_S (in1
[3], in1
[2], 3);
688 out0
[2] = amd_bytealign_S ( 0, in1
[3], 3);
695 case 24: out0
[0] = in1
[2];
704 case 25: out0
[0] = amd_bytealign_S (in1
[3], in1
[2], 1);
705 out0
[1] = amd_bytealign_S ( 0, in1
[3], 1);
713 case 26: out0
[0] = amd_bytealign_S (in1
[3], in1
[2], 2);
714 out0
[1] = amd_bytealign_S ( 0, in1
[3], 2);
722 case 27: out0
[0] = amd_bytealign_S (in1
[3], in1
[2], 3);
723 out0
[1] = amd_bytealign_S ( 0, in1
[3], 3);
731 case 28: out0
[0] = in1
[3];
740 case 29: out0
[0] = amd_bytealign_S ( 0, in1
[3], 1);
749 case 30: out0
[0] = amd_bytealign_S ( 0, in1
[3], 2);
758 case 31: out0
[0] = amd_bytealign_S ( 0, in1
[3], 3);
771 static void rshift_block_N (const u32 in0
[4], const u32 in1
[4], u32 out0
[4], u32 out1
[4], const u32 num
)
776 case 0: out1
[3] = in1
[3];
785 case 1: out1
[3] = __byte_perm_S (in1
[2], in1
[3], 0x6543);
786 out1
[2] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
787 out1
[1] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
788 out1
[0] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
789 out0
[3] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
790 out0
[2] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
791 out0
[1] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
792 out0
[0] = __byte_perm_S ( 0, in0
[0], 0x6543);
794 case 2: out1
[3] = __byte_perm_S (in1
[2], in1
[3], 0x5432);
795 out1
[2] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
796 out1
[1] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
797 out1
[0] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
798 out0
[3] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
799 out0
[2] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
800 out0
[1] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
801 out0
[0] = __byte_perm_S ( 0, in0
[0], 0x5432);
803 case 3: out1
[3] = __byte_perm_S (in1
[2], in1
[3], 0x4321);
804 out1
[2] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
805 out1
[1] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
806 out1
[0] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
807 out0
[3] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
808 out0
[2] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
809 out0
[1] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
810 out0
[0] = __byte_perm_S ( 0, in0
[0], 0x4321);
812 case 4: out1
[3] = in1
[2];
821 case 5: out1
[3] = __byte_perm_S (in1
[1], in1
[2], 0x6543);
822 out1
[2] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
823 out1
[1] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
824 out1
[0] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
825 out0
[3] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
826 out0
[2] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
827 out0
[1] = __byte_perm_S ( 0, in0
[0], 0x6543);
830 case 6: out1
[3] = __byte_perm_S (in1
[1], in1
[2], 0x5432);
831 out1
[2] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
832 out1
[1] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
833 out1
[0] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
834 out0
[3] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
835 out0
[2] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
836 out0
[1] = __byte_perm_S ( 0, in0
[0], 0x5432);
839 case 7: out1
[3] = __byte_perm_S (in1
[1], in1
[2], 0x4321);
840 out1
[2] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
841 out1
[1] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
842 out1
[0] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
843 out0
[3] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
844 out0
[2] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
845 out0
[1] = __byte_perm_S ( 0, in0
[0], 0x4321);
848 case 8: out1
[3] = in1
[1];
857 case 9: out1
[3] = __byte_perm_S (in1
[0], in1
[1], 0x6543);
858 out1
[2] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
859 out1
[1] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
860 out1
[0] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
861 out0
[3] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
862 out0
[2] = __byte_perm_S ( 0, in0
[0], 0x6543);
866 case 10: out1
[3] = __byte_perm_S (in1
[0], in1
[1], 0x5432);
867 out1
[2] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
868 out1
[1] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
869 out1
[0] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
870 out0
[3] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
871 out0
[2] = __byte_perm_S ( 0, in0
[0], 0x5432);
875 case 11: out1
[3] = __byte_perm_S (in1
[0], in1
[1], 0x4321);
876 out1
[2] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
877 out1
[1] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
878 out1
[0] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
879 out0
[3] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
880 out0
[2] = __byte_perm_S ( 0, in0
[0], 0x4321);
884 case 12: out1
[3] = in1
[0];
893 case 13: out1
[3] = __byte_perm_S (in0
[3], in1
[0], 0x6543);
894 out1
[2] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
895 out1
[1] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
896 out1
[0] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
897 out0
[3] = __byte_perm_S ( 0, in0
[0], 0x6543);
902 case 14: out1
[3] = __byte_perm_S (in0
[3], in1
[0], 0x5432);
903 out1
[2] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
904 out1
[1] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
905 out1
[0] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
906 out0
[3] = __byte_perm_S ( 0, in0
[0], 0x5432);
911 case 15: out1
[3] = __byte_perm_S (in0
[3], in1
[0], 0x4321);
912 out1
[2] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
913 out1
[1] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
914 out1
[0] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
915 out0
[3] = __byte_perm_S ( 0, in0
[0], 0x4321);
920 case 16: out1
[3] = in0
[3];
929 case 17: out1
[3] = __byte_perm_S (in0
[2], in0
[3], 0x6543);
930 out1
[2] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
931 out1
[1] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
932 out1
[0] = __byte_perm_S ( 0, in0
[0], 0x6543);
938 case 18: out1
[3] = __byte_perm_S (in0
[2], in0
[3], 0x5432);
939 out1
[2] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
940 out1
[1] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
941 out1
[0] = __byte_perm_S ( 0, in0
[0], 0x5432);
947 case 19: out1
[3] = __byte_perm_S (in0
[2], in0
[3], 0x4321);
948 out1
[2] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
949 out1
[1] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
950 out1
[0] = __byte_perm_S ( 0, in0
[0], 0x4321);
956 case 20: out1
[3] = in0
[2];
965 case 21: out1
[3] = __byte_perm_S (in0
[1], in0
[2], 0x6543);
966 out1
[2] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
967 out1
[1] = __byte_perm_S ( 0, in0
[0], 0x6543);
974 case 22: out1
[3] = __byte_perm_S (in0
[1], in0
[2], 0x5432);
975 out1
[2] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
976 out1
[1] = __byte_perm_S ( 0, in0
[0], 0x5432);
983 case 23: out1
[3] = __byte_perm_S (in0
[1], in0
[2], 0x4321);
984 out1
[2] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
985 out1
[1] = __byte_perm_S ( 0, in0
[0], 0x4321);
992 case 24: out1
[3] = in0
[1];
1001 case 25: out1
[3] = __byte_perm_S (in0
[0], in0
[1], 0x6543);
1002 out1
[2] = __byte_perm_S ( 0, in0
[0], 0x6543);
1010 case 26: out1
[3] = __byte_perm_S (in0
[0], in0
[1], 0x5432);
1011 out1
[2] = __byte_perm_S ( 0, in0
[0], 0x5432);
1019 case 27: out1
[3] = __byte_perm_S (in0
[0], in0
[1], 0x4321);
1020 out1
[2] = __byte_perm_S ( 0, in0
[0], 0x4321);
1028 case 28: out1
[3] = in0
[0];
1037 case 29: out1
[3] = __byte_perm_S ( 0, in0
[0], 0x6543);
1046 case 30: out1
[3] = __byte_perm_S ( 0, in0
[0], 0x5432);
1055 case 31: out1
[3] = __byte_perm_S ( 0, in0
[0], 0x4321);
1067 #if defined IS_AMD || defined IS_GENERIC
1070 case 0: out1
[3] = in1
[3];
1079 case 1: out1
[3] = amd_bytealign_S (in1
[3], in1
[2], 3);
1080 out1
[2] = amd_bytealign_S (in1
[2], in1
[1], 3);
1081 out1
[1] = amd_bytealign_S (in1
[1], in1
[0], 3);
1082 out1
[0] = amd_bytealign_S (in1
[0], in0
[3], 3);
1083 out0
[3] = amd_bytealign_S (in0
[3], in0
[2], 3);
1084 out0
[2] = amd_bytealign_S (in0
[2], in0
[1], 3);
1085 out0
[1] = amd_bytealign_S (in0
[1], in0
[0], 3);
1086 out0
[0] = amd_bytealign_S (in0
[0], 0, 3);
1088 case 2: out1
[3] = amd_bytealign_S (in1
[3], in1
[2], 2);
1089 out1
[2] = amd_bytealign_S (in1
[2], in1
[1], 2);
1090 out1
[1] = amd_bytealign_S (in1
[1], in1
[0], 2);
1091 out1
[0] = amd_bytealign_S (in1
[0], in0
[3], 2);
1092 out0
[3] = amd_bytealign_S (in0
[3], in0
[2], 2);
1093 out0
[2] = amd_bytealign_S (in0
[2], in0
[1], 2);
1094 out0
[1] = amd_bytealign_S (in0
[1], in0
[0], 2);
1095 out0
[0] = amd_bytealign_S (in0
[0], 0, 2);
1097 case 3: out1
[3] = amd_bytealign_S (in1
[3], in1
[2], 1);
1098 out1
[2] = amd_bytealign_S (in1
[2], in1
[1], 1);
1099 out1
[1] = amd_bytealign_S (in1
[1], in1
[0], 1);
1100 out1
[0] = amd_bytealign_S (in1
[0], in0
[3], 1);
1101 out0
[3] = amd_bytealign_S (in0
[3], in0
[2], 1);
1102 out0
[2] = amd_bytealign_S (in0
[2], in0
[1], 1);
1103 out0
[1] = amd_bytealign_S (in0
[1], in0
[0], 1);
1104 out0
[0] = amd_bytealign_S (in0
[0], 0, 1);
1106 case 4: out1
[3] = in1
[2];
1115 case 5: out1
[3] = amd_bytealign_S (in1
[2], in1
[1], 3);
1116 out1
[2] = amd_bytealign_S (in1
[1], in1
[0], 3);
1117 out1
[1] = amd_bytealign_S (in1
[0], in0
[3], 3);
1118 out1
[0] = amd_bytealign_S (in0
[3], in0
[2], 3);
1119 out0
[3] = amd_bytealign_S (in0
[2], in0
[1], 3);
1120 out0
[2] = amd_bytealign_S (in0
[1], in0
[0], 3);
1121 out0
[1] = amd_bytealign_S (in0
[0], 0, 3);
1124 case 6: out1
[3] = amd_bytealign_S (in1
[2], in1
[1], 2);
1125 out1
[2] = amd_bytealign_S (in1
[1], in1
[0], 2);
1126 out1
[1] = amd_bytealign_S (in1
[0], in0
[3], 2);
1127 out1
[0] = amd_bytealign_S (in0
[3], in0
[2], 2);
1128 out0
[3] = amd_bytealign_S (in0
[2], in0
[1], 2);
1129 out0
[2] = amd_bytealign_S (in0
[1], in0
[0], 2);
1130 out0
[1] = amd_bytealign_S (in0
[0], 0, 2);
1133 case 7: out1
[3] = amd_bytealign_S (in1
[2], in1
[1], 1);
1134 out1
[2] = amd_bytealign_S (in1
[1], in1
[0], 1);
1135 out1
[1] = amd_bytealign_S (in1
[0], in0
[3], 1);
1136 out1
[0] = amd_bytealign_S (in0
[3], in0
[2], 1);
1137 out0
[3] = amd_bytealign_S (in0
[2], in0
[1], 1);
1138 out0
[2] = amd_bytealign_S (in0
[1], in0
[0], 1);
1139 out0
[1] = amd_bytealign_S (in0
[0], 0, 1);
1142 case 8: out1
[3] = in1
[1];
1151 case 9: out1
[3] = amd_bytealign_S (in1
[1], in1
[0], 3);
1152 out1
[2] = amd_bytealign_S (in1
[0], in0
[3], 3);
1153 out1
[1] = amd_bytealign_S (in0
[3], in0
[2], 3);
1154 out1
[0] = amd_bytealign_S (in0
[2], in0
[1], 3);
1155 out0
[3] = amd_bytealign_S (in0
[1], in0
[0], 3);
1156 out0
[2] = amd_bytealign_S (in0
[0], 0, 3);
1160 case 10: out1
[3] = amd_bytealign_S (in1
[1], in1
[0], 2);
1161 out1
[2] = amd_bytealign_S (in1
[0], in0
[3], 2);
1162 out1
[1] = amd_bytealign_S (in0
[3], in0
[2], 2);
1163 out1
[0] = amd_bytealign_S (in0
[2], in0
[1], 2);
1164 out0
[3] = amd_bytealign_S (in0
[1], in0
[0], 2);
1165 out0
[2] = amd_bytealign_S (in0
[0], 0, 2);
1169 case 11: out1
[3] = amd_bytealign_S (in1
[1], in1
[0], 1);
1170 out1
[2] = amd_bytealign_S (in1
[0], in0
[3], 1);
1171 out1
[1] = amd_bytealign_S (in0
[3], in0
[2], 1);
1172 out1
[0] = amd_bytealign_S (in0
[2], in0
[1], 1);
1173 out0
[3] = amd_bytealign_S (in0
[1], in0
[0], 1);
1174 out0
[2] = amd_bytealign_S (in0
[0], 0, 1);
1178 case 12: out1
[3] = in1
[0];
1187 case 13: out1
[3] = amd_bytealign_S (in1
[0], in0
[3], 3);
1188 out1
[2] = amd_bytealign_S (in0
[3], in0
[2], 3);
1189 out1
[1] = amd_bytealign_S (in0
[2], in0
[1], 3);
1190 out1
[0] = amd_bytealign_S (in0
[1], in0
[0], 3);
1191 out0
[3] = amd_bytealign_S (in0
[0], 0, 3);
1196 case 14: out1
[3] = amd_bytealign_S (in1
[0], in0
[3], 2);
1197 out1
[2] = amd_bytealign_S (in0
[3], in0
[2], 2);
1198 out1
[1] = amd_bytealign_S (in0
[2], in0
[1], 2);
1199 out1
[0] = amd_bytealign_S (in0
[1], in0
[0], 2);
1200 out0
[3] = amd_bytealign_S (in0
[0], 0, 2);
1205 case 15: out1
[3] = amd_bytealign_S (in1
[0], in0
[3], 1);
1206 out1
[2] = amd_bytealign_S (in0
[3], in0
[2], 1);
1207 out1
[1] = amd_bytealign_S (in0
[2], in0
[1], 1);
1208 out1
[0] = amd_bytealign_S (in0
[1], in0
[0], 1);
1209 out0
[3] = amd_bytealign_S (in0
[0], 0, 1);
1214 case 16: out1
[3] = in0
[3];
1223 case 17: out1
[3] = amd_bytealign_S (in0
[3], in0
[2], 3);
1224 out1
[2] = amd_bytealign_S (in0
[2], in0
[1], 3);
1225 out1
[1] = amd_bytealign_S (in0
[1], in0
[0], 3);
1226 out1
[0] = amd_bytealign_S (in0
[0], 0, 3);
1232 case 18: out1
[3] = amd_bytealign_S (in0
[3], in0
[2], 2);
1233 out1
[2] = amd_bytealign_S (in0
[2], in0
[1], 2);
1234 out1
[1] = amd_bytealign_S (in0
[1], in0
[0], 2);
1235 out1
[0] = amd_bytealign_S (in0
[0], 0, 2);
1241 case 19: out1
[3] = amd_bytealign_S (in0
[3], in0
[2], 1);
1242 out1
[2] = amd_bytealign_S (in0
[2], in0
[1], 1);
1243 out1
[1] = amd_bytealign_S (in0
[1], in0
[0], 1);
1244 out1
[0] = amd_bytealign_S (in0
[0], 0, 1);
1250 case 20: out1
[3] = in0
[2];
1259 case 21: out1
[3] = amd_bytealign_S (in0
[2], in0
[1], 3);
1260 out1
[2] = amd_bytealign_S (in0
[1], in0
[0], 3);
1261 out1
[1] = amd_bytealign_S (in0
[0], 0, 3);
1268 case 22: out1
[3] = amd_bytealign_S (in0
[2], in0
[1], 2);
1269 out1
[2] = amd_bytealign_S (in0
[1], in0
[0], 2);
1270 out1
[1] = amd_bytealign_S (in0
[0], 0, 2);
1277 case 23: out1
[3] = amd_bytealign_S (in0
[2], in0
[1], 1);
1278 out1
[2] = amd_bytealign_S (in0
[1], in0
[0], 1);
1279 out1
[1] = amd_bytealign_S (in0
[0], 0, 1);
1286 case 24: out1
[3] = in0
[1];
1295 case 25: out1
[3] = amd_bytealign_S (in0
[1], in0
[0], 3);
1296 out1
[2] = amd_bytealign_S (in0
[0], 0, 3);
1304 case 26: out1
[3] = amd_bytealign_S (in0
[1], in0
[0], 2);
1305 out1
[2] = amd_bytealign_S (in0
[0], 0, 2);
1313 case 27: out1
[3] = amd_bytealign_S (in0
[1], in0
[0], 1);
1314 out1
[2] = amd_bytealign_S (in0
[0], 0, 1);
1322 case 28: out1
[3] = in0
[0];
1331 case 29: out1
[3] = amd_bytealign_S (in0
[0], 0, 3);
1340 case 30: out1
[3] = amd_bytealign_S (in0
[0], 0, 2);
1349 case 31: out1
[3] = amd_bytealign_S (in0
[0], 0, 1);
1362 static void append_block1 (const u32 offset
, u32 dst0
[4], u32 dst1
[4], const u32 src_r0
)
1368 case 0: tmp
[0] = src_r0
;
1371 case 1: tmp
[0] = src_r0
<< 8;
1372 tmp
[1] = src_r0
>> 24;
1374 case 2: tmp
[0] = src_r0
<< 16;
1375 tmp
[1] = src_r0
>> 16;
1377 case 3: tmp
[0] = src_r0
<< 24;
1378 tmp
[1] = src_r0
>> 8;
1384 case 0: dst0
[0] |= tmp
[0];
1387 case 1: dst0
[1] |= tmp
[0];
1390 case 2: dst0
[2] |= tmp
[0];
1393 case 3: dst0
[3] |= tmp
[0];
1396 case 4: dst1
[0] |= tmp
[0];
1399 case 5: dst1
[1] |= tmp
[0];
1402 case 6: dst1
[2] |= tmp
[0];
1405 case 7: dst1
[3] |= tmp
[0];
1410 static void append_block8 (const u32 offset
, u32 dst0
[4], u32 dst1
[4], const u32 src_l0
[4], const u32 src_l1
[4], const u32 src_r0
[4], const u32 src_r1
[4])
1416 dst0
[0] = src_r0
[0];
1417 dst0
[1] = src_r0
[1];
1418 dst0
[2] = src_r0
[2];
1419 dst0
[3] = src_r0
[3];
1420 dst1
[0] = src_r1
[0];
1421 dst1
[1] = src_r1
[1];
1422 dst1
[2] = src_r1
[2];
1423 dst1
[3] = src_r1
[3];
1427 dst0
[0] = __byte_perm_S (src_l0
[0], src_r0
[0], 0x6540);
1428 dst0
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1429 dst0
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1430 dst0
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1431 dst1
[0] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x6543);
1432 dst1
[1] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x6543);
1433 dst1
[2] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x6543);
1434 dst1
[3] = __byte_perm_S (src_r1
[2], src_r1
[3], 0x6543);
1438 dst0
[0] = __byte_perm_S (src_l0
[0], src_r0
[0], 0x5410);
1439 dst0
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1440 dst0
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1441 dst0
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1442 dst1
[0] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x5432);
1443 dst1
[1] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x5432);
1444 dst1
[2] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x5432);
1445 dst1
[3] = __byte_perm_S (src_r1
[2], src_r1
[3], 0x5432);
1449 dst0
[0] = __byte_perm_S (src_l0
[0], src_r0
[0], 0x4210);
1450 dst0
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1451 dst0
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1452 dst0
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1453 dst1
[0] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x4321);
1454 dst1
[1] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x4321);
1455 dst1
[2] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x4321);
1456 dst1
[3] = __byte_perm_S (src_r1
[2], src_r1
[3], 0x4321);
1460 dst0
[1] = src_r0
[0];
1461 dst0
[2] = src_r0
[1];
1462 dst0
[3] = src_r0
[2];
1463 dst1
[0] = src_r0
[3];
1464 dst1
[1] = src_r1
[0];
1465 dst1
[2] = src_r1
[1];
1466 dst1
[3] = src_r1
[2];
1470 dst0
[1] = __byte_perm_S (src_l0
[1], src_r0
[0], 0x6540);
1471 dst0
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1472 dst0
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1473 dst1
[0] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1474 dst1
[1] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x6543);
1475 dst1
[2] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x6543);
1476 dst1
[3] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x6543);
1480 dst0
[1] = __byte_perm_S (src_l0
[1], src_r0
[0], 0x5410);
1481 dst0
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1482 dst0
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1483 dst1
[0] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1484 dst1
[1] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x5432);
1485 dst1
[2] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x5432);
1486 dst1
[3] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x5432);
1490 dst0
[1] = __byte_perm_S (src_l0
[1], src_r0
[0], 0x4210);
1491 dst0
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1492 dst0
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1493 dst1
[0] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1494 dst1
[1] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x4321);
1495 dst1
[2] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x4321);
1496 dst1
[3] = __byte_perm_S (src_r1
[1], src_r1
[2], 0x4321);
1500 dst0
[2] = src_r0
[0];
1501 dst0
[3] = src_r0
[1];
1502 dst1
[0] = src_r0
[2];
1503 dst1
[1] = src_r0
[3];
1504 dst1
[2] = src_r1
[0];
1505 dst1
[3] = src_r1
[1];
1509 dst0
[2] = __byte_perm_S (src_l0
[2], src_r0
[0], 0x6540);
1510 dst0
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1511 dst1
[0] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1512 dst1
[1] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1513 dst1
[2] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x6543);
1514 dst1
[3] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x6543);
1518 dst0
[2] = __byte_perm_S (src_l0
[2], src_r0
[0], 0x5410);
1519 dst0
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1520 dst1
[0] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1521 dst1
[1] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1522 dst1
[2] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x5432);
1523 dst1
[3] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x5432);
1527 dst0
[2] = __byte_perm_S (src_l0
[2], src_r0
[0], 0x4210);
1528 dst0
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1529 dst1
[0] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1530 dst1
[1] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1531 dst1
[2] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x4321);
1532 dst1
[3] = __byte_perm_S (src_r1
[0], src_r1
[1], 0x4321);
1536 dst0
[3] = src_r0
[0];
1537 dst1
[0] = src_r0
[1];
1538 dst1
[1] = src_r0
[2];
1539 dst1
[2] = src_r0
[3];
1540 dst1
[3] = src_r1
[0];
1544 dst0
[3] = __byte_perm_S (src_l0
[3], src_r0
[0], 0x6540);
1545 dst1
[0] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1546 dst1
[1] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1547 dst1
[2] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1548 dst1
[3] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x6543);
1552 dst0
[3] = __byte_perm_S (src_l0
[3], src_r0
[0], 0x5410);
1553 dst1
[0] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1554 dst1
[1] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1555 dst1
[2] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1556 dst1
[3] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x5432);
1560 dst0
[3] = __byte_perm_S (src_l0
[3], src_r0
[0], 0x4210);
1561 dst1
[0] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1562 dst1
[1] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1563 dst1
[2] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1564 dst1
[3] = __byte_perm_S (src_r0
[3], src_r1
[0], 0x4321);
1568 dst1
[0] = src_r0
[0];
1569 dst1
[1] = src_r0
[1];
1570 dst1
[2] = src_r0
[2];
1571 dst1
[3] = src_r0
[3];
1575 dst1
[0] = __byte_perm_S (src_l1
[0], src_r0
[0], 0x6540);
1576 dst1
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1577 dst1
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1578 dst1
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x6543);
1582 dst1
[0] = __byte_perm_S (src_l1
[0], src_r0
[0], 0x5410);
1583 dst1
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1584 dst1
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1585 dst1
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x5432);
1589 dst1
[0] = __byte_perm_S (src_l1
[0], src_r0
[0], 0x4210);
1590 dst1
[1] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1591 dst1
[2] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1592 dst1
[3] = __byte_perm_S (src_r0
[2], src_r0
[3], 0x4321);
1596 dst1
[1] = src_r0
[0];
1597 dst1
[2] = src_r0
[1];
1598 dst1
[3] = src_r0
[2];
1602 dst1
[1] = __byte_perm_S (src_l1
[1], src_r0
[0], 0x6540);
1603 dst1
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1604 dst1
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x6543);
1608 dst1
[1] = __byte_perm_S (src_l1
[1], src_r0
[0], 0x5410);
1609 dst1
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1610 dst1
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x5432);
1614 dst1
[1] = __byte_perm_S (src_l1
[1], src_r0
[0], 0x4210);
1615 dst1
[2] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1616 dst1
[3] = __byte_perm_S (src_r0
[1], src_r0
[2], 0x4321);
1620 dst1
[2] = src_r0
[0];
1621 dst1
[3] = src_r0
[1];
1625 dst1
[2] = __byte_perm_S (src_l1
[2], src_r0
[0], 0x6540);
1626 dst1
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x6543);
1630 dst1
[2] = __byte_perm_S (src_l1
[2], src_r0
[0], 0x5410);
1631 dst1
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x5432);
1635 dst1
[2] = __byte_perm_S (src_l1
[2], src_r0
[0], 0x4210);
1636 dst1
[3] = __byte_perm_S (src_r0
[0], src_r0
[1], 0x4321);
1640 dst1
[3] = src_r0
[0];
1644 dst1
[3] = __byte_perm_S (src_l1
[3], src_r0
[0], 0x6540);
1648 dst1
[3] = __byte_perm_S (src_l1
[3], src_r0
[0], 0x5410);
1652 dst1
[3] = __byte_perm_S (src_l1
[3], src_r0
[0], 0x4210);
1657 #if defined IS_AMD || defined IS_GENERIC
1661 dst1
[3] = src_l1
[3] | src_r0
[0] << 24;
1664 dst1
[3] = src_l1
[3] | src_r0
[0] << 16;
1667 dst1
[3] = src_l1
[3] | src_r0
[0] << 8;
1670 dst1
[3] = src_r0
[0];
1673 dst1
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1674 dst1
[2] = src_l1
[2] | src_r0
[0] << 24;
1677 dst1
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1678 dst1
[2] = src_l1
[2] | src_r0
[0] << 16;
1681 dst1
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1682 dst1
[2] = src_l1
[2] | src_r0
[0] << 8;
1685 dst1
[3] = src_r0
[1];
1686 dst1
[2] = src_r0
[0];
1689 dst1
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1690 dst1
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1691 dst1
[1] = src_l1
[1] | src_r0
[0] << 24;
1694 dst1
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1695 dst1
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1696 dst1
[1] = src_l1
[1] | src_r0
[0] << 16;
1699 dst1
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1700 dst1
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1701 dst1
[1] = src_l1
[1] | src_r0
[0] << 8;
1704 dst1
[3] = src_r0
[2];
1705 dst1
[2] = src_r0
[1];
1706 dst1
[1] = src_r0
[0];
1709 dst1
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1710 dst1
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1711 dst1
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1712 dst1
[0] = src_l1
[0] | src_r0
[0] << 24;
1715 dst1
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1716 dst1
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1717 dst1
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1718 dst1
[0] = src_l1
[0] | src_r0
[0] << 16;
1721 dst1
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1722 dst1
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1723 dst1
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1724 dst1
[0] = src_l1
[0] | src_r0
[0] << 8;
1727 dst1
[3] = src_r0
[3];
1728 dst1
[2] = src_r0
[2];
1729 dst1
[1] = src_r0
[1];
1730 dst1
[0] = src_r0
[0];
1733 dst1
[3] = amd_bytealign_S (src_r1
[0], src_r0
[3], 1);
1734 dst1
[2] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1735 dst1
[1] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1736 dst1
[0] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1737 dst0
[3] = src_l0
[3] | src_r0
[0] << 24;
1740 dst1
[3] = amd_bytealign_S (src_r1
[0], src_r0
[3], 2);
1741 dst1
[2] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1742 dst1
[1] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1743 dst1
[0] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1744 dst0
[3] = src_l0
[3] | src_r0
[0] << 16;
1747 dst1
[3] = amd_bytealign_S (src_r1
[0], src_r0
[3], 3);
1748 dst1
[2] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1749 dst1
[1] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1750 dst1
[0] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1751 dst0
[3] = src_l0
[3] | src_r0
[0] << 8;
1754 dst1
[3] = src_r1
[0];
1755 dst1
[2] = src_r0
[3];
1756 dst1
[1] = src_r0
[2];
1757 dst1
[0] = src_r0
[1];
1758 dst0
[3] = src_r0
[0];
1761 dst1
[3] = amd_bytealign_S (src_r1
[1], src_r1
[0], 1);
1762 dst1
[2] = amd_bytealign_S (src_r1
[0], src_r0
[3], 1);
1763 dst1
[1] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1764 dst1
[0] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1765 dst0
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1766 dst0
[2] = src_l0
[2] | src_r0
[0] << 24;
1769 dst1
[3] = amd_bytealign_S (src_r1
[1], src_r1
[0], 2);
1770 dst1
[2] = amd_bytealign_S (src_r1
[0], src_r0
[3], 2);
1771 dst1
[1] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1772 dst1
[0] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1773 dst0
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1774 dst0
[2] = src_l0
[2] | src_r0
[0] << 16;
1777 dst1
[3] = amd_bytealign_S (src_r1
[1], src_r1
[0], 3);
1778 dst1
[2] = amd_bytealign_S (src_r1
[0], src_r0
[3], 3);
1779 dst1
[1] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1780 dst1
[0] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1781 dst0
[3] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1782 dst0
[2] = src_l0
[2] | src_r0
[0] << 8;
1785 dst1
[3] = src_r1
[1];
1786 dst1
[2] = src_r1
[0];
1787 dst1
[1] = src_r0
[3];
1788 dst1
[0] = src_r0
[2];
1789 dst0
[3] = src_r0
[1];
1790 dst0
[2] = src_r0
[0];
1793 dst1
[3] = amd_bytealign_S (src_r1
[2], src_r1
[1], 1);
1794 dst1
[2] = amd_bytealign_S (src_r1
[1], src_r1
[0], 1);
1795 dst1
[1] = amd_bytealign_S (src_r1
[0], src_r0
[3], 1);
1796 dst1
[0] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1797 dst0
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1798 dst0
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1799 dst0
[1] = src_l0
[1] | src_r0
[0] << 24;
1802 dst1
[3] = amd_bytealign_S (src_r1
[2], src_r1
[1], 2);
1803 dst1
[2] = amd_bytealign_S (src_r1
[1], src_r1
[0], 2);
1804 dst1
[1] = amd_bytealign_S (src_r1
[0], src_r0
[3], 2);
1805 dst1
[0] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1806 dst0
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1807 dst0
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1808 dst0
[1] = src_l0
[1] | src_r0
[0] << 16;
1811 dst1
[3] = amd_bytealign_S (src_r1
[2], src_r1
[1], 3);
1812 dst1
[2] = amd_bytealign_S (src_r1
[1], src_r1
[0], 3);
1813 dst1
[1] = amd_bytealign_S (src_r1
[0], src_r0
[3], 3);
1814 dst1
[0] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1815 dst0
[3] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1816 dst0
[2] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1817 dst0
[1] = src_l0
[1] | src_r0
[0] << 8;
1820 dst1
[3] = src_r1
[2];
1821 dst1
[2] = src_r1
[1];
1822 dst1
[1] = src_r1
[0];
1823 dst1
[0] = src_r0
[3];
1824 dst0
[3] = src_r0
[2];
1825 dst0
[2] = src_r0
[1];
1826 dst0
[1] = src_r0
[0];
1829 dst1
[3] = amd_bytealign_S (src_r1
[3], src_r1
[2], 1);
1830 dst1
[2] = amd_bytealign_S (src_r1
[2], src_r1
[1], 1);
1831 dst1
[1] = amd_bytealign_S (src_r1
[1], src_r1
[0], 1);
1832 dst1
[0] = amd_bytealign_S (src_r1
[0], src_r0
[3], 1);
1833 dst0
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 1);
1834 dst0
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 1);
1835 dst0
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 1);
1836 dst0
[0] = src_l0
[0] | src_r0
[0] << 24;
1839 dst1
[3] = amd_bytealign_S (src_r1
[3], src_r1
[2], 2);
1840 dst1
[2] = amd_bytealign_S (src_r1
[2], src_r1
[1], 2);
1841 dst1
[1] = amd_bytealign_S (src_r1
[1], src_r1
[0], 2);
1842 dst1
[0] = amd_bytealign_S (src_r1
[0], src_r0
[3], 2);
1843 dst0
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 2);
1844 dst0
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 2);
1845 dst0
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 2);
1846 dst0
[0] = src_l0
[0] | src_r0
[0] << 16;
1849 dst1
[3] = amd_bytealign_S (src_r1
[3], src_r1
[2], 3);
1850 dst1
[2] = amd_bytealign_S (src_r1
[2], src_r1
[1], 3);
1851 dst1
[1] = amd_bytealign_S (src_r1
[1], src_r1
[0], 3);
1852 dst1
[0] = amd_bytealign_S (src_r1
[0], src_r0
[3], 3);
1853 dst0
[3] = amd_bytealign_S (src_r0
[3], src_r0
[2], 3);
1854 dst0
[2] = amd_bytealign_S (src_r0
[2], src_r0
[1], 3);
1855 dst0
[1] = amd_bytealign_S (src_r0
[1], src_r0
[0], 3);
1856 dst0
[0] = src_l0
[0] | src_r0
[0] << 8;
1859 dst1
[3] = src_r1
[3];
1860 dst1
[2] = src_r1
[2];
1861 dst1
[1] = src_r1
[1];
1862 dst1
[0] = src_r1
[0];
1863 dst0
[3] = src_r0
[3];
1864 dst0
[2] = src_r0
[2];
1865 dst0
[1] = src_r0
[1];
1866 dst0
[0] = src_r0
[0];
1872 static void reverse_block (u32 in0
[4], u32 in1
[4], u32 out0
[4], u32 out1
[4], const u32 len
)
1874 rshift_block_N (in0
, in1
, out0
, out1
, 32 - len
);
1888 out0
[0] = swap32_S (tib40
[0]);
1889 out0
[1] = swap32_S (tib40
[1]);
1890 out0
[2] = swap32_S (tib40
[2]);
1891 out0
[3] = swap32_S (tib40
[3]);
1892 out1
[0] = swap32_S (tib41
[0]);
1893 out1
[1] = swap32_S (tib41
[1]);
1894 out1
[2] = swap32_S (tib41
[2]);
1895 out1
[3] = swap32_S (tib41
[3]);
1898 static u32
rule_op_mangle_lrest (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1900 buf0
[0] |= (generate_cmask (buf0
[0]));
1901 buf0
[1] |= (generate_cmask (buf0
[1]));
1902 buf0
[2] |= (generate_cmask (buf0
[2]));
1903 buf0
[3] |= (generate_cmask (buf0
[3]));
1904 buf1
[0] |= (generate_cmask (buf1
[0]));
1905 buf1
[1] |= (generate_cmask (buf1
[1]));
1906 buf1
[2] |= (generate_cmask (buf1
[2]));
1907 buf1
[3] |= (generate_cmask (buf1
[3]));
1912 static u32
rule_op_mangle_urest (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1914 buf0
[0] &= ~(generate_cmask (buf0
[0]));
1915 buf0
[1] &= ~(generate_cmask (buf0
[1]));
1916 buf0
[2] &= ~(generate_cmask (buf0
[2]));
1917 buf0
[3] &= ~(generate_cmask (buf0
[3]));
1918 buf1
[0] &= ~(generate_cmask (buf1
[0]));
1919 buf1
[1] &= ~(generate_cmask (buf1
[1]));
1920 buf1
[2] &= ~(generate_cmask (buf1
[2]));
1921 buf1
[3] &= ~(generate_cmask (buf1
[3]));
1926 static u32
rule_op_mangle_lrest_ufirst (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1928 rule_op_mangle_lrest (p0
, p1
, buf0
, buf1
, in_len
);
1930 buf0
[0] &= ~(0x00000020 & generate_cmask (buf0
[0]));
1935 static u32
rule_op_mangle_urest_lfirst (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1937 rule_op_mangle_urest (p0
, p1
, buf0
, buf1
, in_len
);
1939 buf0
[0] |= (0x00000020 & generate_cmask (buf0
[0]));
1944 static u32
rule_op_mangle_trest (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1946 buf0
[0] ^= (generate_cmask (buf0
[0]));
1947 buf0
[1] ^= (generate_cmask (buf0
[1]));
1948 buf0
[2] ^= (generate_cmask (buf0
[2]));
1949 buf0
[3] ^= (generate_cmask (buf0
[3]));
1950 buf1
[0] ^= (generate_cmask (buf1
[0]));
1951 buf1
[1] ^= (generate_cmask (buf1
[1]));
1952 buf1
[2] ^= (generate_cmask (buf1
[2]));
1953 buf1
[3] ^= (generate_cmask (buf1
[3]));
1958 static u32
rule_op_mangle_toggle_at (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1960 if (p0
>= in_len
) return (in_len
);
1962 const u32 tmp
= 0x20u
<< ((p0
& 3) * 8);
1966 case 0: buf0
[0] ^= (tmp
& generate_cmask (buf0
[0])); break;
1967 case 1: buf0
[1] ^= (tmp
& generate_cmask (buf0
[1])); break;
1968 case 2: buf0
[2] ^= (tmp
& generate_cmask (buf0
[2])); break;
1969 case 3: buf0
[3] ^= (tmp
& generate_cmask (buf0
[3])); break;
1970 case 4: buf1
[0] ^= (tmp
& generate_cmask (buf1
[0])); break;
1971 case 5: buf1
[1] ^= (tmp
& generate_cmask (buf1
[1])); break;
1972 case 6: buf1
[2] ^= (tmp
& generate_cmask (buf1
[2])); break;
1973 case 7: buf1
[3] ^= (tmp
& generate_cmask (buf1
[3])); break;
1979 static u32
rule_op_mangle_reverse (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1981 reverse_block (buf0
, buf1
, buf0
, buf1
, in_len
);
1986 static u32
rule_op_mangle_dupeword (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
1988 if ((in_len
+ in_len
) >= 32) return (in_len
);
1990 u32 out_len
= in_len
;
1992 append_block8 (out_len
, buf0
, buf1
, buf0
, buf1
, buf0
, buf1
);
1999 static u32
rule_op_mangle_dupeword_times (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2001 if (((in_len
* p0
) + in_len
) >= 32) return (in_len
);
2003 u32 out_len
= in_len
;
2017 for (u32 i
= 0; i
< p0
; i
++)
2019 append_block8 (out_len
, buf0
, buf1
, buf0
, buf1
, tib40
, tib41
);
2027 static u32
rule_op_mangle_reflect (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2029 if ((in_len
+ in_len
) >= 32) return (in_len
);
2031 u32 out_len
= in_len
;
2036 reverse_block (buf0
, buf1
, tib40
, tib41
, out_len
);
2038 append_block8 (out_len
, buf0
, buf1
, buf0
, buf1
, tib40
, tib41
);
2045 static u32
rule_op_mangle_append (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2047 if ((in_len
+ 1) >= 32) return (in_len
);
2049 u32 out_len
= in_len
;
2051 append_block1 (out_len
, buf0
, buf1
, p0
);
2058 static u32
rule_op_mangle_prepend (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2060 if ((in_len
+ 1) >= 32) return (in_len
);
2062 u32 out_len
= in_len
;
2064 rshift_block (buf0
, buf1
, buf0
, buf1
);
2066 buf0
[0] = buf0
[0] | p0
;
2073 static u32
rule_op_mangle_rotate_left (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2075 if (in_len
== 0) return (in_len
);
2077 const u32 in_len1
= in_len
- 1;
2079 const u32 sh
= (in_len1
& 3) * 8;
2081 const u32 tmp
= (buf0
[0] & 0xff) << sh
;
2083 lshift_block (buf0
, buf1
, buf0
, buf1
);
2085 switch (in_len1
/ 4)
2087 case 0: buf0
[0] |= tmp
; break;
2088 case 1: buf0
[1] |= tmp
; break;
2089 case 2: buf0
[2] |= tmp
; break;
2090 case 3: buf0
[3] |= tmp
; break;
2091 case 4: buf1
[0] |= tmp
; break;
2092 case 5: buf1
[1] |= tmp
; break;
2093 case 6: buf1
[2] |= tmp
; break;
2094 case 7: buf1
[3] |= tmp
; break;
2100 static u32
rule_op_mangle_rotate_right (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2102 if (in_len
== 0) return (in_len
);
2104 const u32 in_len1
= in_len
- 1;
2106 const u32 sh
= (in_len1
& 3) * 8;
2110 switch (in_len1
/ 4)
2112 case 0: tmp
= (buf0
[0] >> sh
) & 0xff; break;
2113 case 1: tmp
= (buf0
[1] >> sh
) & 0xff; break;
2114 case 2: tmp
= (buf0
[2] >> sh
) & 0xff; break;
2115 case 3: tmp
= (buf0
[3] >> sh
) & 0xff; break;
2116 case 4: tmp
= (buf1
[0] >> sh
) & 0xff; break;
2117 case 5: tmp
= (buf1
[1] >> sh
) & 0xff; break;
2118 case 6: tmp
= (buf1
[2] >> sh
) & 0xff; break;
2119 case 7: tmp
= (buf1
[3] >> sh
) & 0xff; break;
2122 rshift_block (buf0
, buf1
, buf0
, buf1
);
2126 truncate_right (buf0
, buf1
, in_len
);
2131 static u32
rule_op_mangle_delete_first (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2133 if (in_len
== 0) return (in_len
);
2135 const u32 in_len1
= in_len
- 1;
2137 lshift_block (buf0
, buf1
, buf0
, buf1
);
2142 static u32
rule_op_mangle_delete_last (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2144 if (in_len
== 0) return (in_len
);
2146 const u32 in_len1
= in_len
- 1;
2148 const u32 tmp
= (1 << ((in_len1
& 3) * 8)) - 1;
2150 switch (in_len1
/ 4)
2152 case 0: buf0
[0] &= tmp
; break;
2153 case 1: buf0
[1] &= tmp
; break;
2154 case 2: buf0
[2] &= tmp
; break;
2155 case 3: buf0
[3] &= tmp
; break;
2156 case 4: buf1
[0] &= tmp
; break;
2157 case 5: buf1
[1] &= tmp
; break;
2158 case 6: buf1
[2] &= tmp
; break;
2159 case 7: buf1
[3] &= tmp
; break;
2165 static u32
rule_op_mangle_delete_at (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2167 if (p0
>= in_len
) return (in_len
);
2169 u32 out_len
= in_len
;
2174 lshift_block (buf0
, buf1
, tib40
, tib41
);
2176 const u32 ml
= (1 << ((p0
& 3) * 8)) - 1;
2181 case 0: buf0
[0] = (buf0
[0] & ml
)
2191 case 1: buf0
[1] = (buf0
[1] & ml
)
2200 case 2: buf0
[2] = (buf0
[2] & ml
)
2208 case 3: buf0
[3] = (buf0
[3] & ml
)
2215 case 4: buf1
[0] = (buf1
[0] & ml
)
2221 case 5: buf1
[1] = (buf1
[1] & ml
)
2226 case 6: buf1
[2] = (buf1
[2] & ml
)
2230 case 7: buf1
[3] = (buf1
[3] & ml
)
2240 static u32
rule_op_mangle_extract (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2242 if (p0
>= in_len
) return (in_len
);
2244 if ((p0
+ p1
) > in_len
) return (in_len
);
2248 lshift_block_N (buf0
, buf1
, buf0
, buf1
, p0
);
2250 truncate_right (buf0
, buf1
, out_len
);
2255 static u32
rule_op_mangle_omit (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2257 if (p0
>= in_len
) return (in_len
);
2259 if ((p0
+ p1
) > in_len
) return (in_len
);
2261 u32 out_len
= in_len
;
2275 lshift_block_N (buf0
, buf1
, tib40
, tib41
, p1
);
2277 const u32 ml
= (1 << ((p0
& 3) * 8)) - 1;
2282 case 0: buf0
[0] = (buf0
[0] & ml
)
2292 case 1: buf0
[1] = (buf0
[1] & ml
)
2301 case 2: buf0
[2] = (buf0
[2] & ml
)
2309 case 3: buf0
[3] = (buf0
[3] & ml
)
2316 case 4: buf1
[0] = (buf1
[0] & ml
)
2322 case 5: buf1
[1] = (buf1
[1] & ml
)
2327 case 6: buf1
[2] = (buf1
[2] & ml
)
2331 case 7: buf1
[3] = (buf1
[3] & ml
)
2341 static u32
rule_op_mangle_insert (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2343 if (p0
> in_len
) return (in_len
);
2345 if ((in_len
+ 1) >= 32) return (in_len
);
2347 u32 out_len
= in_len
;
2352 rshift_block (buf0
, buf1
, tib40
, tib41
);
2354 const u32 p1n
= p1
<< ((p0
& 3) * 8);
2356 const u32 ml
= (1 << ((p0
& 3) * 8)) - 1;
2358 const u32 mr
= 0xffffff00 << ((p0
& 3) * 8);
2362 case 0: buf0
[0] = (buf0
[0] & ml
) | p1n
| (tib40
[0] & mr
);
2371 case 1: buf0
[1] = (buf0
[1] & ml
) | p1n
| (tib40
[1] & mr
);
2379 case 2: buf0
[2] = (buf0
[2] & ml
) | p1n
| (tib40
[2] & mr
);
2386 case 3: buf0
[3] = (buf0
[3] & ml
) | p1n
| (tib40
[3] & mr
);
2392 case 4: buf1
[0] = (buf1
[0] & ml
) | p1n
| (tib41
[0] & mr
);
2397 case 5: buf1
[1] = (buf1
[1] & ml
) | p1n
| (tib41
[1] & mr
);
2401 case 6: buf1
[2] = (buf1
[2] & ml
) | p1n
| (tib41
[2] & mr
);
2404 case 7: buf1
[3] = (buf1
[3] & ml
) | p1n
| (tib41
[3] & mr
);
2413 static u32
rule_op_mangle_overstrike (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2415 if (p0
>= in_len
) return (in_len
);
2417 const u32 p1n
= p1
<< ((p0
& 3) * 8);
2419 const u32 m
= ~(0xffu
<< ((p0
& 3) * 8));
2423 case 0: buf0
[0] = (buf0
[0] & m
) | p1n
; break;
2424 case 1: buf0
[1] = (buf0
[1] & m
) | p1n
; break;
2425 case 2: buf0
[2] = (buf0
[2] & m
) | p1n
; break;
2426 case 3: buf0
[3] = (buf0
[3] & m
) | p1n
; break;
2427 case 4: buf1
[0] = (buf1
[0] & m
) | p1n
; break;
2428 case 5: buf1
[1] = (buf1
[1] & m
) | p1n
; break;
2429 case 6: buf1
[2] = (buf1
[2] & m
) | p1n
; break;
2430 case 7: buf1
[3] = (buf1
[3] & m
) | p1n
; break;
2436 static u32
rule_op_mangle_truncate_at (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2438 if (p0
>= in_len
) return (in_len
);
2440 truncate_right (buf0
, buf1
, p0
);
2445 static u32
rule_op_mangle_replace (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2448 for (u32 i
= 0; i
< in_len
; i
++)
2452 case 0: if ((__byte_perm_S (buf0
[0], 0, 0x6540)) == p0
) buf0
[0] = __byte_perm_S (p1
, buf0
[0], 0x7650);
2454 case 1: if ((__byte_perm_S (buf0
[0], 0, 0x6541)) == p0
) buf0
[0] = __byte_perm_S (p1
, buf0
[0], 0x7604);
2456 case 2: if ((__byte_perm_S (buf0
[0], 0, 0x6542)) == p0
) buf0
[0] = __byte_perm_S (p1
, buf0
[0], 0x7054);
2458 case 3: if ((__byte_perm_S (buf0
[0], 0, 0x6543)) == p0
) buf0
[0] = __byte_perm_S (p1
, buf0
[0], 0x0654);
2460 case 4: if ((__byte_perm_S (buf0
[1], 0, 0x6540)) == p0
) buf0
[1] = __byte_perm_S (p1
, buf0
[1], 0x7650);
2462 case 5: if ((__byte_perm_S (buf0
[1], 0, 0x6541)) == p0
) buf0
[1] = __byte_perm_S (p1
, buf0
[1], 0x7604);
2464 case 6: if ((__byte_perm_S (buf0
[1], 0, 0x6542)) == p0
) buf0
[1] = __byte_perm_S (p1
, buf0
[1], 0x7054);
2466 case 7: if ((__byte_perm_S (buf0
[1], 0, 0x6543)) == p0
) buf0
[1] = __byte_perm_S (p1
, buf0
[1], 0x0654);
2468 case 8: if ((__byte_perm_S (buf0
[2], 0, 0x6540)) == p0
) buf0
[2] = __byte_perm_S (p1
, buf0
[2], 0x7650);
2470 case 9: if ((__byte_perm_S (buf0
[2], 0, 0x6541)) == p0
) buf0
[2] = __byte_perm_S (p1
, buf0
[2], 0x7604);
2472 case 10: if ((__byte_perm_S (buf0
[2], 0, 0x6542)) == p0
) buf0
[2] = __byte_perm_S (p1
, buf0
[2], 0x7054);
2474 case 11: if ((__byte_perm_S (buf0
[2], 0, 0x6543)) == p0
) buf0
[2] = __byte_perm_S (p1
, buf0
[2], 0x0654);
2476 case 12: if ((__byte_perm_S (buf0
[3], 0, 0x6540)) == p0
) buf0
[3] = __byte_perm_S (p1
, buf0
[3], 0x7650);
2478 case 13: if ((__byte_perm_S (buf0
[3], 0, 0x6541)) == p0
) buf0
[3] = __byte_perm_S (p1
, buf0
[3], 0x7604);
2480 case 14: if ((__byte_perm_S (buf0
[3], 0, 0x6542)) == p0
) buf0
[3] = __byte_perm_S (p1
, buf0
[3], 0x7054);
2482 case 15: if ((__byte_perm_S (buf0
[3], 0, 0x6543)) == p0
) buf0
[3] = __byte_perm_S (p1
, buf0
[3], 0x0654);
2484 case 16: if ((__byte_perm_S (buf1
[0], 0, 0x6540)) == p0
) buf1
[0] = __byte_perm_S (p1
, buf1
[0], 0x7650);
2486 case 17: if ((__byte_perm_S (buf1
[0], 0, 0x6541)) == p0
) buf1
[0] = __byte_perm_S (p1
, buf1
[0], 0x7604);
2488 case 18: if ((__byte_perm_S (buf1
[0], 0, 0x6542)) == p0
) buf1
[0] = __byte_perm_S (p1
, buf1
[0], 0x7054);
2490 case 19: if ((__byte_perm_S (buf1
[0], 0, 0x6543)) == p0
) buf1
[0] = __byte_perm_S (p1
, buf1
[0], 0x0654);
2492 case 20: if ((__byte_perm_S (buf1
[1], 0, 0x6540)) == p0
) buf1
[1] = __byte_perm_S (p1
, buf1
[1], 0x7650);
2494 case 21: if ((__byte_perm_S (buf1
[1], 0, 0x6541)) == p0
) buf1
[1] = __byte_perm_S (p1
, buf1
[1], 0x7604);
2496 case 22: if ((__byte_perm_S (buf1
[1], 0, 0x6542)) == p0
) buf1
[1] = __byte_perm_S (p1
, buf1
[1], 0x7054);
2498 case 23: if ((__byte_perm_S (buf1
[1], 0, 0x6543)) == p0
) buf1
[1] = __byte_perm_S (p1
, buf1
[1], 0x0654);
2500 case 24: if ((__byte_perm_S (buf1
[2], 0, 0x6540)) == p0
) buf1
[2] = __byte_perm_S (p1
, buf1
[2], 0x7650);
2502 case 25: if ((__byte_perm_S (buf1
[2], 0, 0x6541)) == p0
) buf1
[2] = __byte_perm_S (p1
, buf1
[2], 0x7604);
2504 case 26: if ((__byte_perm_S (buf1
[2], 0, 0x6542)) == p0
) buf1
[2] = __byte_perm_S (p1
, buf1
[2], 0x7054);
2506 case 27: if ((__byte_perm_S (buf1
[2], 0, 0x6543)) == p0
) buf1
[2] = __byte_perm_S (p1
, buf1
[2], 0x0654);
2508 case 28: if ((__byte_perm_S (buf1
[3], 0, 0x6540)) == p0
) buf1
[3] = __byte_perm_S (p1
, buf1
[3], 0x7650);
2510 case 29: if ((__byte_perm_S (buf1
[3], 0, 0x6541)) == p0
) buf1
[3] = __byte_perm_S (p1
, buf1
[3], 0x7604);
2512 case 30: if ((__byte_perm_S (buf1
[3], 0, 0x6542)) == p0
) buf1
[3] = __byte_perm_S (p1
, buf1
[3], 0x7054);
2514 case 31: if ((__byte_perm_S (buf1
[3], 0, 0x6543)) == p0
) buf1
[3] = __byte_perm_S (p1
, buf1
[3], 0x0654);
2520 #if defined IS_AMD || defined IS_GENERIC
2521 const uchar4 tmp0
= (uchar4
) (p0
);
2522 const uchar4 tmp1
= (uchar4
) (p1
);
2526 tmp
= as_uchar4 (buf0
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[0] = as_uint (tmp
);
2527 tmp
= as_uchar4 (buf0
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[1] = as_uint (tmp
);
2528 tmp
= as_uchar4 (buf0
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[2] = as_uint (tmp
);
2529 tmp
= as_uchar4 (buf0
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf0
[3] = as_uint (tmp
);
2530 tmp
= as_uchar4 (buf1
[0]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[0] = as_uint (tmp
);
2531 tmp
= as_uchar4 (buf1
[1]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[1] = as_uint (tmp
);
2532 tmp
= as_uchar4 (buf1
[2]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[2] = as_uint (tmp
);
2533 tmp
= as_uchar4 (buf1
[3]); tmp
= select (tmp
, tmp1
, tmp
== tmp0
); buf1
[3] = as_uint (tmp
);
2539 static u32
rule_op_mangle_purgechar (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2545 static u32
rule_op_mangle_togglecase_rec (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2551 static u32
rule_op_mangle_dupechar_first (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2553 if ( in_len
== 0) return (in_len
);
2554 if ((in_len
+ p0
) >= 32) return (in_len
);
2556 u32 out_len
= in_len
;
2558 const u32 tmp
= buf0
[0] & 0xFF;
2560 rshift_block_N (buf0
, buf1
, buf0
, buf1
, p0
);
2565 case 1: buf0
[0] |= tmp
;
2567 case 2: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x5400);
2569 case 3: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x4000);
2571 case 4: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2573 case 5: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2576 case 6: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2577 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x5400);
2579 case 7: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2580 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x4000);
2582 case 8: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2583 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2585 case 9: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2586 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2589 case 10: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2590 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2591 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x5400);
2593 case 11: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2594 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2595 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x4000);
2597 case 12: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2598 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2599 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2601 case 13: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2602 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2603 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2606 case 14: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2607 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2608 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2609 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x5400);
2611 case 15: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2612 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2613 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2614 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x4000);
2616 case 16: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2617 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2618 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2619 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2621 case 17: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2622 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2623 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2624 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2627 case 18: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2628 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2629 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2630 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2631 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x5400);
2633 case 19: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2634 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2635 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2636 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2637 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x4000);
2639 case 20: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2640 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2641 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2642 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2643 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2645 case 21: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2646 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2647 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2648 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2649 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2652 case 22: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2653 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2654 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2655 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2656 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2657 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x5400);
2659 case 23: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2660 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2661 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2662 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2663 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2664 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x4000);
2666 case 24: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2667 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2668 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2669 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2670 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2671 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2673 case 25: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2674 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2675 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2676 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2677 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2678 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2681 case 26: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2682 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2683 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2684 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2685 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2686 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2687 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x5400);
2689 case 27: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2690 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2691 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2692 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2693 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2694 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2695 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x4000);
2697 case 28: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2698 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2699 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2700 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2701 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2702 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2703 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2705 case 29: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2706 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2707 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2708 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2709 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2710 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2711 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2714 case 30: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2715 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2716 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2717 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2718 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2719 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2720 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2721 buf1
[3] |= __byte_perm_S (tmp
, 0, 0x5400);
2723 case 31: buf0
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2724 buf0
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2725 buf0
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2726 buf0
[3] |= __byte_perm_S (tmp
, 0, 0x0000);
2727 buf1
[0] |= __byte_perm_S (tmp
, 0, 0x0000);
2728 buf1
[1] |= __byte_perm_S (tmp
, 0, 0x0000);
2729 buf1
[2] |= __byte_perm_S (tmp
, 0, 0x0000);
2730 buf1
[3] |= __byte_perm_S (tmp
, 0, 0x4000);
2735 #if defined IS_AMD || defined IS_GENERIC
2738 case 1: buf0
[0] |= tmp
<< 0;
2740 case 2: buf0
[0] |= tmp
<< 0 | tmp
<< 8;
2742 case 3: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2744 case 4: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2746 case 5: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2747 buf0
[1] |= tmp
<< 0;
2749 case 6: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2750 buf0
[1] |= tmp
<< 0 | tmp
<< 8;
2752 case 7: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2753 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2755 case 8: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2756 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2758 case 9: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2759 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2760 buf0
[2] |= tmp
<< 0;
2762 case 10: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2763 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2764 buf0
[2] |= tmp
<< 0 | tmp
<< 8;
2766 case 11: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2767 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2768 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2770 case 12: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2771 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2772 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2774 case 13: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2775 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2776 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2777 buf0
[3] |= tmp
<< 0;
2779 case 14: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2780 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2781 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2782 buf0
[3] |= tmp
<< 0 | tmp
<< 8;
2784 case 15: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2785 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2786 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2787 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2789 case 16: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2790 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2791 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2792 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2794 case 17: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2795 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2796 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2797 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2798 buf1
[0] |= tmp
<< 0;
2800 case 18: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2801 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2802 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2803 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2804 buf1
[0] |= tmp
<< 0 | tmp
<< 8;
2806 case 19: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2807 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2808 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2809 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2810 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2812 case 20: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2813 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2814 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2815 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2816 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2818 case 21: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2819 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2820 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2821 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2822 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2823 buf1
[1] |= tmp
<< 0;
2825 case 22: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2826 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2827 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2828 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2829 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2830 buf1
[1] |= tmp
<< 0 | tmp
<< 8;
2832 case 23: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2833 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2834 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2835 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2836 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2837 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2839 case 24: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2840 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2841 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2842 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2843 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2844 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2846 case 25: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2847 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2848 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2849 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2850 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2851 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2852 buf1
[2] |= tmp
<< 0;
2854 case 26: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2855 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2856 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2857 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2858 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2859 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2860 buf1
[2] |= tmp
<< 0 | tmp
<< 8;
2862 case 27: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2863 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2864 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2865 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2866 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2867 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2868 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2870 case 28: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2871 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2872 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2873 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2874 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2875 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2876 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2878 case 29: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2879 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2880 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2881 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2882 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2883 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2884 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2885 buf1
[3] |= tmp
<< 0;
2887 case 30: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2888 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2889 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2890 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2891 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2892 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2893 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2894 buf1
[3] |= tmp
<< 0 | tmp
<< 8;
2896 case 31: buf0
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2897 buf0
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2898 buf0
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2899 buf0
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2900 buf1
[0] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2901 buf1
[1] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2902 buf1
[2] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16 | tmp
<< 24;
2903 buf1
[3] |= tmp
<< 0 | tmp
<< 8 | tmp
<< 16;
2913 static u32
rule_op_mangle_dupechar_last (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2915 if ( in_len
== 0) return (in_len
);
2916 if ((in_len
+ p0
) >= 32) return (in_len
);
2918 const u32 in_len1
= in_len
- 1;
2920 const u32 sh
= (in_len1
& 3) * 8;
2924 switch (in_len1
/ 4)
2926 case 0: tmp
= (buf0
[0] >> sh
) & 0xff; break;
2927 case 1: tmp
= (buf0
[1] >> sh
) & 0xff; break;
2928 case 2: tmp
= (buf0
[2] >> sh
) & 0xff; break;
2929 case 3: tmp
= (buf0
[3] >> sh
) & 0xff; break;
2930 case 4: tmp
= (buf1
[0] >> sh
) & 0xff; break;
2931 case 5: tmp
= (buf1
[1] >> sh
) & 0xff; break;
2932 case 6: tmp
= (buf1
[2] >> sh
) & 0xff; break;
2933 case 7: tmp
= (buf1
[3] >> sh
) & 0xff; break;
2936 u32 out_len
= in_len
;
2938 for (u32 i
= 0; i
< p0
; i
++)
2940 append_block1 (out_len
, buf0
, buf1
, tmp
);
2948 static u32
rule_op_mangle_dupechar_all (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
2950 if ( in_len
== 0) return (in_len
);
2951 if ((in_len
+ in_len
) >= 32) return (in_len
);
2953 u32 out_len
= in_len
;
2959 tib40
[0] = __byte_perm_S (buf0
[0], 0, 0x1100);
2960 tib40
[1] = __byte_perm_S (buf0
[0], 0, 0x3322);
2961 tib40
[2] = __byte_perm_S (buf0
[1], 0, 0x1100);
2962 tib40
[3] = __byte_perm_S (buf0
[1], 0, 0x3322);
2963 tib41
[0] = __byte_perm_S (buf0
[2], 0, 0x1100);
2964 tib41
[1] = __byte_perm_S (buf0
[2], 0, 0x3322);
2965 tib41
[2] = __byte_perm_S (buf0
[3], 0, 0x1100);
2966 tib41
[3] = __byte_perm_S (buf0
[3], 0, 0x3322);
2978 #if defined IS_AMD || defined IS_GENERIC
2979 tib40
[0] = ((buf0
[0] & 0x000000FF) << 0) | ((buf0
[0] & 0x0000FF00) << 8);
2980 tib40
[1] = ((buf0
[0] & 0x00FF0000) >> 16) | ((buf0
[0] & 0xFF000000) >> 8);
2981 tib40
[2] = ((buf0
[1] & 0x000000FF) << 0) | ((buf0
[1] & 0x0000FF00) << 8);
2982 tib40
[3] = ((buf0
[1] & 0x00FF0000) >> 16) | ((buf0
[1] & 0xFF000000) >> 8);
2983 tib41
[0] = ((buf0
[2] & 0x000000FF) << 0) | ((buf0
[2] & 0x0000FF00) << 8);
2984 tib41
[1] = ((buf0
[2] & 0x00FF0000) >> 16) | ((buf0
[2] & 0xFF000000) >> 8);
2985 tib41
[2] = ((buf0
[3] & 0x000000FF) << 0) | ((buf0
[3] & 0x0000FF00) << 8);
2986 tib41
[3] = ((buf0
[3] & 0x00FF0000) >> 16) | ((buf0
[3] & 0xFF000000) >> 8);
2988 buf0
[0] = tib40
[0] | (tib40
[0] << 8);
2989 buf0
[1] = tib40
[1] | (tib40
[1] << 8);
2990 buf0
[2] = tib40
[2] | (tib40
[2] << 8);
2991 buf0
[3] = tib40
[3] | (tib40
[3] << 8);
2992 buf1
[0] = tib41
[0] | (tib41
[0] << 8);
2993 buf1
[1] = tib41
[1] | (tib41
[1] << 8);
2994 buf1
[2] = tib41
[2] | (tib41
[2] << 8);
2995 buf1
[3] = tib41
[3] | (tib41
[3] << 8);
2998 out_len
= out_len
+ out_len
;
3003 static u32
rule_op_mangle_switch_first (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3005 if (in_len
< 2) return (in_len
);
3008 buf0
[0] = __byte_perm_S (buf0
[0], 0, 0x3201);
3011 #if defined IS_AMD || defined IS_GENERIC
3012 buf0
[0] = (buf0
[0] & 0xFFFF0000) | ((buf0
[0] << 8) & 0x0000FF00) | ((buf0
[0] >> 8) & 0x000000FF);
3018 static u32
rule_op_mangle_switch_last (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3020 if (in_len
< 2) return (in_len
);
3025 case 2: buf0
[0] = __byte_perm_S (buf0
[0], 0, 0x5401);
3027 case 3: buf0
[0] = __byte_perm_S (buf0
[0], 0, 0x4120);
3029 case 4: buf0
[0] = __byte_perm_S (buf0
[0], 0, 0x2310);
3031 case 5: buf0
[1] = __byte_perm_S (buf0
[1], buf0
[0], 0x7210);
3032 buf0
[0] = __byte_perm_S (buf0
[0], buf0
[1], 0x4210);
3033 buf0
[1] = __byte_perm_S (buf0
[1], 0, 0x6543);
3035 case 6: buf0
[1] = __byte_perm_S (buf0
[1], 0, 0x5401);
3037 case 7: buf0
[1] = __byte_perm_S (buf0
[1], 0, 0x4120);
3039 case 8: buf0
[1] = __byte_perm_S (buf0
[1], 0, 0x2310);
3041 case 9: buf0
[2] = __byte_perm_S (buf0
[2], buf0
[1], 0x7210);
3042 buf0
[1] = __byte_perm_S (buf0
[1], buf0
[2], 0x4210);
3043 buf0
[2] = __byte_perm_S (buf0
[2], 0, 0x6543);
3045 case 10: buf0
[2] = __byte_perm_S (buf0
[2], 0, 0x5401);
3047 case 11: buf0
[2] = __byte_perm_S (buf0
[2], 0, 0x4120);
3049 case 12: buf0
[2] = __byte_perm_S (buf0
[2], 0, 0x2310);
3051 case 13: buf0
[3] = __byte_perm_S (buf0
[3], buf0
[2], 0x7210);
3052 buf0
[2] = __byte_perm_S (buf0
[2], buf0
[3], 0x4210);
3053 buf0
[3] = __byte_perm_S (buf0
[3], 0, 0x6543);
3055 case 14: buf0
[3] = __byte_perm_S (buf0
[3], 0, 0x5401);
3057 case 15: buf0
[3] = __byte_perm_S (buf0
[3], 0, 0x4120);
3059 case 16: buf0
[3] = __byte_perm_S (buf0
[3], 0, 0x2310);
3061 case 17: buf1
[0] = __byte_perm_S (buf1
[0], buf0
[3], 0x7210);
3062 buf0
[3] = __byte_perm_S (buf0
[3], buf1
[0], 0x4210);
3063 buf1
[0] = __byte_perm_S (buf1
[0], 0, 0x6543);
3065 case 18: buf1
[0] = __byte_perm_S (buf1
[0], 0, 0x5401);
3067 case 19: buf1
[0] = __byte_perm_S (buf1
[0], 0, 0x4120);
3069 case 20: buf1
[0] = __byte_perm_S (buf1
[0], 0, 0x2310);
3071 case 21: buf1
[1] = __byte_perm_S (buf1
[1], buf1
[0], 0x7210);
3072 buf1
[0] = __byte_perm_S (buf1
[0], buf1
[1], 0x4210);
3073 buf1
[1] = __byte_perm_S (buf1
[1], 0, 0x6543);
3075 case 22: buf1
[1] = __byte_perm_S (buf1
[1], 0, 0x5401);
3077 case 23: buf1
[1] = __byte_perm_S (buf1
[1], 0, 0x4120);
3079 case 24: buf1
[1] = __byte_perm_S (buf1
[1], 0, 0x2310);
3081 case 25: buf1
[2] = __byte_perm_S (buf1
[2], buf1
[1], 0x7210);
3082 buf1
[1] = __byte_perm_S (buf1
[1], buf1
[2], 0x4210);
3083 buf1
[2] = __byte_perm_S (buf1
[2], 0, 0x6543);
3085 case 26: buf1
[2] = __byte_perm_S (buf1
[2], 0, 0x5401);
3087 case 27: buf1
[2] = __byte_perm_S (buf1
[2], 0, 0x4120);
3089 case 28: buf1
[2] = __byte_perm_S (buf1
[2], 0, 0x2310);
3091 case 29: buf1
[3] = __byte_perm_S (buf1
[3], buf1
[2], 0x7210);
3092 buf1
[2] = __byte_perm_S (buf1
[2], buf1
[3], 0x4210);
3093 buf1
[3] = __byte_perm_S (buf1
[3], 0, 0x6543);
3095 case 30: buf1
[3] = __byte_perm_S (buf1
[3], 0, 0x5401);
3097 case 31: buf1
[3] = __byte_perm_S (buf1
[3], 0, 0x4120);
3102 #if defined IS_AMD || defined IS_GENERIC
3105 case 2: buf0
[0] = ((buf0
[0] << 8) & 0x0000FF00) | ((buf0
[0] >> 8) & 0x000000FF);
3107 case 3: buf0
[0] = (buf0
[0] & 0x000000FF) | ((buf0
[0] << 8) & 0x00FF0000) | ((buf0
[0] >> 8) & 0x0000FF00);
3109 case 4: buf0
[0] = (buf0
[0] & 0x0000FFFF) | ((buf0
[0] << 8) & 0xFF000000) | ((buf0
[0] >> 8) & 0x00FF0000);
3111 case 5: buf0
[1] = (buf0
[0] & 0xFF000000) | buf0
[1];
3112 buf0
[0] = (buf0
[0] & 0x00FFFFFF) | (buf0
[1] << 24);
3113 buf0
[1] = (buf0
[1] >> 24);
3115 case 6: buf0
[1] = ((buf0
[1] << 8) & 0x0000FF00) | ((buf0
[1] >> 8) & 0x000000FF);
3117 case 7: buf0
[1] = (buf0
[1] & 0x000000FF) | ((buf0
[1] << 8) & 0x00FF0000) | ((buf0
[1] >> 8) & 0x0000FF00);
3119 case 8: buf0
[1] = (buf0
[1] & 0x0000FFFF) | ((buf0
[1] << 8) & 0xFF000000) | ((buf0
[1] >> 8) & 0x00FF0000);
3121 case 9: buf0
[2] = (buf0
[1] & 0xFF000000) | buf0
[2];
3122 buf0
[1] = (buf0
[1] & 0x00FFFFFF) | (buf0
[2] << 24);
3123 buf0
[2] = (buf0
[2] >> 24);
3125 case 10: buf0
[2] = ((buf0
[2] << 8) & 0x0000FF00) | ((buf0
[2] >> 8) & 0x000000FF);
3127 case 11: buf0
[2] = (buf0
[2] & 0x000000FF) | ((buf0
[2] << 8) & 0x00FF0000) | ((buf0
[2] >> 8) & 0x0000FF00);
3129 case 12: buf0
[2] = (buf0
[2] & 0x0000FFFF) | ((buf0
[2] << 8) & 0xFF000000) | ((buf0
[2] >> 8) & 0x00FF0000);
3131 case 13: buf0
[3] = (buf0
[2] & 0xFF000000) | buf0
[3];
3132 buf0
[2] = (buf0
[2] & 0x00FFFFFF) | (buf0
[3] << 24);
3133 buf0
[3] = (buf0
[3] >> 24);
3135 case 14: buf0
[3] = ((buf0
[3] << 8) & 0x0000FF00) | ((buf0
[3] >> 8) & 0x000000FF);
3137 case 15: buf0
[3] = (buf0
[3] & 0x000000FF) | ((buf0
[3] << 8) & 0x00FF0000) | ((buf0
[3] >> 8) & 0x0000FF00);
3139 case 16: buf0
[3] = (buf0
[3] & 0x0000FFFF) | ((buf0
[3] << 8) & 0xFF000000) | ((buf0
[3] >> 8) & 0x00FF0000);
3141 case 17: buf1
[0] = (buf0
[3] & 0xFF000000) | buf1
[0];
3142 buf0
[3] = (buf0
[3] & 0x00FFFFFF) | (buf1
[0] << 24);
3143 buf1
[0] = (buf1
[0] >> 24);
3145 case 18: buf1
[0] = ((buf1
[0] << 8) & 0x0000FF00) | ((buf1
[0] >> 8) & 0x000000FF);
3147 case 19: buf1
[0] = (buf1
[0] & 0x000000FF) | ((buf1
[0] << 8) & 0x00FF0000) | ((buf1
[0] >> 8) & 0x0000FF00);
3149 case 20: buf1
[0] = (buf1
[0] & 0x0000FFFF) | ((buf1
[0] << 8) & 0xFF000000) | ((buf1
[0] >> 8) & 0x00FF0000);
3151 case 21: buf1
[1] = (buf1
[0] & 0xFF000000) | buf1
[1];
3152 buf1
[0] = (buf1
[0] & 0x00FFFFFF) | (buf1
[1] << 24);
3153 buf1
[1] = (buf1
[1] >> 24);
3155 case 22: buf1
[1] = ((buf1
[1] << 8) & 0x0000FF00) | ((buf1
[1] >> 8) & 0x000000FF);
3157 case 23: buf1
[1] = (buf1
[1] & 0x000000FF) | ((buf1
[1] << 8) & 0x00FF0000) | ((buf1
[1] >> 8) & 0x0000FF00);
3159 case 24: buf1
[1] = (buf1
[1] & 0x0000FFFF) | ((buf1
[1] << 8) & 0xFF000000) | ((buf1
[1] >> 8) & 0x00FF0000);
3161 case 25: buf1
[2] = (buf1
[1] & 0xFF000000) | buf1
[2];
3162 buf1
[1] = (buf1
[1] & 0x00FFFFFF) | (buf1
[2] << 24);
3163 buf1
[2] = (buf1
[2] >> 24);
3165 case 26: buf1
[2] = ((buf1
[2] << 8) & 0x0000FF00) | ((buf1
[2] >> 8) & 0x000000FF);
3167 case 27: buf1
[2] = (buf1
[2] & 0x000000FF) | ((buf1
[2] << 8) & 0x00FF0000) | ((buf1
[2] >> 8) & 0x0000FF00);
3169 case 28: buf1
[2] = (buf1
[2] & 0x0000FFFF) | ((buf1
[2] << 8) & 0xFF000000) | ((buf1
[2] >> 8) & 0x00FF0000);
3171 case 29: buf1
[3] = (buf1
[2] & 0xFF000000) | buf1
[3];
3172 buf1
[2] = (buf1
[2] & 0x00FFFFFF) | (buf1
[3] << 24);
3173 buf1
[3] = (buf1
[3] >> 24);
3175 case 30: buf1
[3] = ((buf1
[3] << 8) & 0x0000FF00) | ((buf1
[3] >> 8) & 0x000000FF);
3177 case 31: buf1
[3] = (buf1
[3] & 0x000000FF) | ((buf1
[3] << 8) & 0x00FF0000) | ((buf1
[3] >> 8) & 0x0000FF00);
3185 static u32
rule_op_mangle_switch_at (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3187 if (p0
>= in_len
) return (in_len
);
3188 if (p1
>= in_len
) return (in_len
);
3196 case 0: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6540);
3198 case 1: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6541);
3200 case 2: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6542);
3202 case 3: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6543);
3204 case 4: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6540);
3206 case 5: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6541);
3208 case 6: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6542);
3210 case 7: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6543);
3212 case 8: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6540);
3214 case 9: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6541);
3216 case 10: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6542);
3218 case 11: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6543);
3220 case 12: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6540);
3222 case 13: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6541);
3224 case 14: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6542);
3226 case 15: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6543);
3228 case 16: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6540);
3230 case 17: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6541);
3232 case 18: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6542);
3234 case 19: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6543);
3236 case 20: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6540);
3238 case 21: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6541);
3240 case 22: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6542);
3242 case 23: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6543);
3244 case 24: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6540);
3246 case 25: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6541);
3248 case 26: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6542);
3250 case 27: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6543);
3252 case 28: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6540);
3254 case 29: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6541);
3256 case 30: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6542);
3258 case 31: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6543);
3264 case 0: tmp1
= __byte_perm_S (buf0
[0], 0, 0x6540);
3265 buf0
[0] = __byte_perm_S (tmp0
, buf0
[0], 0x7650);
3267 case 1: tmp1
= __byte_perm_S (buf0
[0], 0, 0x6541);
3268 buf0
[0] = __byte_perm_S (tmp0
, buf0
[0], 0x7604);
3270 case 2: tmp1
= __byte_perm_S (buf0
[0], 0, 0x6542);
3271 buf0
[0] = __byte_perm_S (tmp0
, buf0
[0], 0x7054);
3273 case 3: tmp1
= __byte_perm_S (buf0
[0], 0, 0x6543);
3274 buf0
[0] = __byte_perm_S (tmp0
, buf0
[0], 0x0654);
3276 case 4: tmp1
= __byte_perm_S (buf0
[1], 0, 0x6540);
3277 buf0
[1] = __byte_perm_S (tmp0
, buf0
[1], 0x7650);
3279 case 5: tmp1
= __byte_perm_S (buf0
[1], 0, 0x6541);
3280 buf0
[1] = __byte_perm_S (tmp0
, buf0
[1], 0x7604);
3282 case 6: tmp1
= __byte_perm_S (buf0
[1], 0, 0x6542);
3283 buf0
[1] = __byte_perm_S (tmp0
, buf0
[1], 0x7054);
3285 case 7: tmp1
= __byte_perm_S (buf0
[1], 0, 0x6543);
3286 buf0
[1] = __byte_perm_S (tmp0
, buf0
[1], 0x0654);
3288 case 8: tmp1
= __byte_perm_S (buf0
[2], 0, 0x6540);
3289 buf0
[2] = __byte_perm_S (tmp0
, buf0
[2], 0x7650);
3291 case 9: tmp1
= __byte_perm_S (buf0
[2], 0, 0x6541);
3292 buf0
[2] = __byte_perm_S (tmp0
, buf0
[2], 0x7604);
3294 case 10: tmp1
= __byte_perm_S (buf0
[2], 0, 0x6542);
3295 buf0
[2] = __byte_perm_S (tmp0
, buf0
[2], 0x7054);
3297 case 11: tmp1
= __byte_perm_S (buf0
[2], 0, 0x6543);
3298 buf0
[2] = __byte_perm_S (tmp0
, buf0
[2], 0x0654);
3300 case 12: tmp1
= __byte_perm_S (buf0
[3], 0, 0x6540);
3301 buf0
[3] = __byte_perm_S (tmp0
, buf0
[3], 0x7650);
3303 case 13: tmp1
= __byte_perm_S (buf0
[3], 0, 0x6541);
3304 buf0
[3] = __byte_perm_S (tmp0
, buf0
[3], 0x7604);
3306 case 14: tmp1
= __byte_perm_S (buf0
[3], 0, 0x6542);
3307 buf0
[3] = __byte_perm_S (tmp0
, buf0
[3], 0x7054);
3309 case 15: tmp1
= __byte_perm_S (buf0
[3], 0, 0x6543);
3310 buf0
[3] = __byte_perm_S (tmp0
, buf0
[3], 0x0654);
3312 case 16: tmp1
= __byte_perm_S (buf1
[0], 0, 0x6540);
3313 buf1
[0] = __byte_perm_S (tmp0
, buf1
[0], 0x7650);
3315 case 17: tmp1
= __byte_perm_S (buf1
[0], 0, 0x6541);
3316 buf1
[0] = __byte_perm_S (tmp0
, buf1
[0], 0x7604);
3318 case 18: tmp1
= __byte_perm_S (buf1
[0], 0, 0x6542);
3319 buf1
[0] = __byte_perm_S (tmp0
, buf1
[0], 0x7054);
3321 case 19: tmp1
= __byte_perm_S (buf1
[0], 0, 0x6543);
3322 buf1
[0] = __byte_perm_S (tmp0
, buf1
[0], 0x0654);
3324 case 20: tmp1
= __byte_perm_S (buf1
[1], 0, 0x6540);
3325 buf1
[1] = __byte_perm_S (tmp0
, buf1
[1], 0x7650);
3327 case 21: tmp1
= __byte_perm_S (buf1
[1], 0, 0x6541);
3328 buf1
[1] = __byte_perm_S (tmp0
, buf1
[1], 0x7604);
3330 case 22: tmp1
= __byte_perm_S (buf1
[1], 0, 0x6542);
3331 buf1
[1] = __byte_perm_S (tmp0
, buf1
[1], 0x7054);
3333 case 23: tmp1
= __byte_perm_S (buf1
[1], 0, 0x6543);
3334 buf1
[1] = __byte_perm_S (tmp0
, buf1
[1], 0x0654);
3336 case 24: tmp1
= __byte_perm_S (buf1
[2], 0, 0x6540);
3337 buf1
[2] = __byte_perm_S (tmp0
, buf1
[2], 0x7650);
3339 case 25: tmp1
= __byte_perm_S (buf1
[2], 0, 0x6541);
3340 buf1
[2] = __byte_perm_S (tmp0
, buf1
[2], 0x7604);
3342 case 26: tmp1
= __byte_perm_S (buf1
[2], 0, 0x6542);
3343 buf1
[2] = __byte_perm_S (tmp0
, buf1
[2], 0x7054);
3345 case 27: tmp1
= __byte_perm_S (buf1
[2], 0, 0x6543);
3346 buf1
[2] = __byte_perm_S (tmp0
, buf1
[2], 0x0654);
3348 case 28: tmp1
= __byte_perm_S (buf1
[3], 0, 0x6540);
3349 buf1
[3] = __byte_perm_S (tmp0
, buf1
[3], 0x7650);
3351 case 29: tmp1
= __byte_perm_S (buf1
[3], 0, 0x6541);
3352 buf1
[3] = __byte_perm_S (tmp0
, buf1
[3], 0x7604);
3354 case 30: tmp1
= __byte_perm_S (buf1
[3], 0, 0x6542);
3355 buf1
[3] = __byte_perm_S (tmp0
, buf1
[3], 0x7054);
3357 case 31: tmp1
= __byte_perm_S (buf1
[3], 0, 0x6543);
3358 buf1
[3] = __byte_perm_S (tmp0
, buf1
[3], 0x0654);
3364 case 0: buf0
[0] = __byte_perm_S (tmp1
, buf0
[0], 0x7650);
3366 case 1: buf0
[0] = __byte_perm_S (tmp1
, buf0
[0], 0x7604);
3368 case 2: buf0
[0] = __byte_perm_S (tmp1
, buf0
[0], 0x7054);
3370 case 3: buf0
[0] = __byte_perm_S (tmp1
, buf0
[0], 0x0654);
3372 case 4: buf0
[1] = __byte_perm_S (tmp1
, buf0
[1], 0x7650);
3374 case 5: buf0
[1] = __byte_perm_S (tmp1
, buf0
[1], 0x7604);
3376 case 6: buf0
[1] = __byte_perm_S (tmp1
, buf0
[1], 0x7054);
3378 case 7: buf0
[1] = __byte_perm_S (tmp1
, buf0
[1], 0x0654);
3380 case 8: buf0
[2] = __byte_perm_S (tmp1
, buf0
[2], 0x7650);
3382 case 9: buf0
[2] = __byte_perm_S (tmp1
, buf0
[2], 0x7604);
3384 case 10: buf0
[2] = __byte_perm_S (tmp1
, buf0
[2], 0x7054);
3386 case 11: buf0
[2] = __byte_perm_S (tmp1
, buf0
[2], 0x0654);
3388 case 12: buf0
[3] = __byte_perm_S (tmp1
, buf0
[3], 0x7650);
3390 case 13: buf0
[3] = __byte_perm_S (tmp1
, buf0
[3], 0x7604);
3392 case 14: buf0
[3] = __byte_perm_S (tmp1
, buf0
[3], 0x7054);
3394 case 15: buf0
[3] = __byte_perm_S (tmp1
, buf0
[3], 0x0654);
3396 case 16: buf1
[0] = __byte_perm_S (tmp1
, buf1
[0], 0x7650);
3398 case 17: buf1
[0] = __byte_perm_S (tmp1
, buf1
[0], 0x7604);
3400 case 18: buf1
[0] = __byte_perm_S (tmp1
, buf1
[0], 0x7054);
3402 case 19: buf1
[0] = __byte_perm_S (tmp1
, buf1
[0], 0x0654);
3404 case 20: buf1
[1] = __byte_perm_S (tmp1
, buf1
[1], 0x7650);
3406 case 21: buf1
[1] = __byte_perm_S (tmp1
, buf1
[1], 0x7604);
3408 case 22: buf1
[1] = __byte_perm_S (tmp1
, buf1
[1], 0x7054);
3410 case 23: buf1
[1] = __byte_perm_S (tmp1
, buf1
[1], 0x0654);
3412 case 24: buf1
[2] = __byte_perm_S (tmp1
, buf1
[2], 0x7650);
3414 case 25: buf1
[2] = __byte_perm_S (tmp1
, buf1
[2], 0x7604);
3416 case 26: buf1
[2] = __byte_perm_S (tmp1
, buf1
[2], 0x7054);
3418 case 27: buf1
[2] = __byte_perm_S (tmp1
, buf1
[2], 0x0654);
3420 case 28: buf1
[3] = __byte_perm_S (tmp1
, buf1
[3], 0x7650);
3422 case 29: buf1
[3] = __byte_perm_S (tmp1
, buf1
[3], 0x7604);
3424 case 30: buf1
[3] = __byte_perm_S (tmp1
, buf1
[3], 0x7054);
3426 case 31: buf1
[3] = __byte_perm_S (tmp1
, buf1
[3], 0x0654);
3431 #if defined IS_AMD || defined IS_GENERIC
3434 case 0: tmp0
= (buf0
[0] >> 0) & 0xFF;
3436 case 1: tmp0
= (buf0
[0] >> 8) & 0xFF;
3438 case 2: tmp0
= (buf0
[0] >> 16) & 0xFF;
3440 case 3: tmp0
= (buf0
[0] >> 24) & 0xFF;
3442 case 4: tmp0
= (buf0
[1] >> 0) & 0xFF;
3444 case 5: tmp0
= (buf0
[1] >> 8) & 0xFF;
3446 case 6: tmp0
= (buf0
[1] >> 16) & 0xFF;
3448 case 7: tmp0
= (buf0
[1] >> 24) & 0xFF;
3450 case 8: tmp0
= (buf0
[2] >> 0) & 0xFF;
3452 case 9: tmp0
= (buf0
[2] >> 8) & 0xFF;
3454 case 10: tmp0
= (buf0
[2] >> 16) & 0xFF;
3456 case 11: tmp0
= (buf0
[2] >> 24) & 0xFF;
3458 case 12: tmp0
= (buf0
[3] >> 0) & 0xFF;
3460 case 13: tmp0
= (buf0
[3] >> 8) & 0xFF;
3462 case 14: tmp0
= (buf0
[3] >> 16) & 0xFF;
3464 case 15: tmp0
= (buf0
[3] >> 24) & 0xFF;
3466 case 16: tmp0
= (buf1
[0] >> 0) & 0xFF;
3468 case 17: tmp0
= (buf1
[0] >> 8) & 0xFF;
3470 case 18: tmp0
= (buf1
[0] >> 16) & 0xFF;
3472 case 19: tmp0
= (buf1
[0] >> 24) & 0xFF;
3474 case 20: tmp0
= (buf1
[1] >> 0) & 0xFF;
3476 case 21: tmp0
= (buf1
[1] >> 8) & 0xFF;
3478 case 22: tmp0
= (buf1
[1] >> 16) & 0xFF;
3480 case 23: tmp0
= (buf1
[1] >> 24) & 0xFF;
3482 case 24: tmp0
= (buf1
[2] >> 0) & 0xFF;
3484 case 25: tmp0
= (buf1
[2] >> 8) & 0xFF;
3486 case 26: tmp0
= (buf1
[2] >> 16) & 0xFF;
3488 case 27: tmp0
= (buf1
[2] >> 24) & 0xFF;
3490 case 28: tmp0
= (buf1
[3] >> 0) & 0xFF;
3492 case 29: tmp0
= (buf1
[3] >> 8) & 0xFF;
3494 case 30: tmp0
= (buf1
[3] >> 16) & 0xFF;
3496 case 31: tmp0
= (buf1
[3] >> 24) & 0xFF;
3502 case 0: tmp1
= (buf0
[0] >> 0) & 0xff;
3503 buf0
[0] = (buf0
[0] & 0xffffff00) | tmp0
<< 0;
3505 case 1: tmp1
= (buf0
[0] >> 8) & 0xff;
3506 buf0
[0] = (buf0
[0] & 0xffff00ff) | tmp0
<< 8;
3508 case 2: tmp1
= (buf0
[0] >> 16) & 0xff;
3509 buf0
[0] = (buf0
[0] & 0xff00ffff) | tmp0
<< 16;
3511 case 3: tmp1
= (buf0
[0] >> 24) & 0xff;
3512 buf0
[0] = (buf0
[0] & 0x00ffffff) | tmp0
<< 24;
3514 case 4: tmp1
= (buf0
[1] >> 0) & 0xff;
3515 buf0
[1] = (buf0
[1] & 0xffffff00) | tmp0
<< 0;
3517 case 5: tmp1
= (buf0
[1] >> 8) & 0xff;
3518 buf0
[1] = (buf0
[1] & 0xffff00ff) | tmp0
<< 8;
3520 case 6: tmp1
= (buf0
[1] >> 16) & 0xff;
3521 buf0
[1] = (buf0
[1] & 0xff00ffff) | tmp0
<< 16;
3523 case 7: tmp1
= (buf0
[1] >> 24) & 0xff;
3524 buf0
[1] = (buf0
[1] & 0x00ffffff) | tmp0
<< 24;
3526 case 8: tmp1
= (buf0
[2] >> 0) & 0xff;
3527 buf0
[2] = (buf0
[2] & 0xffffff00) | tmp0
<< 0;
3529 case 9: tmp1
= (buf0
[2] >> 8) & 0xff;
3530 buf0
[2] = (buf0
[2] & 0xffff00ff) | tmp0
<< 8;
3532 case 10: tmp1
= (buf0
[2] >> 16) & 0xff;
3533 buf0
[2] = (buf0
[2] & 0xff00ffff) | tmp0
<< 16;
3535 case 11: tmp1
= (buf0
[2] >> 24) & 0xff;
3536 buf0
[2] = (buf0
[2] & 0x00ffffff) | tmp0
<< 24;
3538 case 12: tmp1
= (buf0
[3] >> 0) & 0xff;
3539 buf0
[3] = (buf0
[3] & 0xffffff00) | tmp0
<< 0;
3541 case 13: tmp1
= (buf0
[3] >> 8) & 0xff;
3542 buf0
[3] = (buf0
[3] & 0xffff00ff) | tmp0
<< 8;
3544 case 14: tmp1
= (buf0
[3] >> 16) & 0xff;
3545 buf0
[3] = (buf0
[3] & 0xff00ffff) | tmp0
<< 16;
3547 case 15: tmp1
= (buf0
[3] >> 24) & 0xff;
3548 buf0
[3] = (buf0
[3] & 0x00ffffff) | tmp0
<< 24;
3550 case 16: tmp1
= (buf1
[0] >> 0) & 0xff;
3551 buf1
[0] = (buf1
[0] & 0xffffff00) | tmp0
<< 0;
3553 case 17: tmp1
= (buf1
[0] >> 8) & 0xff;
3554 buf1
[0] = (buf1
[0] & 0xffff00ff) | tmp0
<< 8;
3556 case 18: tmp1
= (buf1
[0] >> 16) & 0xff;
3557 buf1
[0] = (buf1
[0] & 0xff00ffff) | tmp0
<< 16;
3559 case 19: tmp1
= (buf1
[0] >> 24) & 0xff;
3560 buf1
[0] = (buf1
[0] & 0x00ffffff) | tmp0
<< 24;
3562 case 20: tmp1
= (buf1
[1] >> 0) & 0xff;
3563 buf1
[1] = (buf1
[1] & 0xffffff00) | tmp0
<< 0;
3565 case 21: tmp1
= (buf1
[1] >> 8) & 0xff;
3566 buf1
[1] = (buf1
[1] & 0xffff00ff) | tmp0
<< 8;
3568 case 22: tmp1
= (buf1
[1] >> 16) & 0xff;
3569 buf1
[1] = (buf1
[1] & 0xff00ffff) | tmp0
<< 16;
3571 case 23: tmp1
= (buf1
[1] >> 24) & 0xff;
3572 buf1
[1] = (buf1
[1] & 0x00ffffff) | tmp0
<< 24;
3574 case 24: tmp1
= (buf1
[2] >> 0) & 0xff;
3575 buf1
[2] = (buf1
[2] & 0xffffff00) | tmp0
<< 0;
3577 case 25: tmp1
= (buf1
[2] >> 8) & 0xff;
3578 buf1
[2] = (buf1
[2] & 0xffff00ff) | tmp0
<< 8;
3580 case 26: tmp1
= (buf1
[2] >> 16) & 0xff;
3581 buf1
[2] = (buf1
[2] & 0xff00ffff) | tmp0
<< 16;
3583 case 27: tmp1
= (buf1
[2] >> 24) & 0xff;
3584 buf1
[2] = (buf1
[2] & 0x00ffffff) | tmp0
<< 24;
3586 case 28: tmp1
= (buf1
[3] >> 0) & 0xff;
3587 buf1
[3] = (buf1
[3] & 0xffffff00) | tmp0
<< 0;
3589 case 29: tmp1
= (buf1
[3] >> 8) & 0xff;
3590 buf1
[3] = (buf1
[3] & 0xffff00ff) | tmp0
<< 8;
3592 case 30: tmp1
= (buf1
[3] >> 16) & 0xff;
3593 buf1
[3] = (buf1
[3] & 0xff00ffff) | tmp0
<< 16;
3595 case 31: tmp1
= (buf1
[3] >> 24) & 0xff;
3596 buf1
[3] = (buf1
[3] & 0x00ffffff) | tmp0
<< 24;
3602 case 0: buf0
[0] = (buf0
[0] & 0xffffff00) | tmp1
<< 0;
3604 case 1: buf0
[0] = (buf0
[0] & 0xffff00ff) | tmp1
<< 8;
3606 case 2: buf0
[0] = (buf0
[0] & 0xff00ffff) | tmp1
<< 16;
3608 case 3: buf0
[0] = (buf0
[0] & 0x00ffffff) | tmp1
<< 24;
3610 case 4: buf0
[1] = (buf0
[1] & 0xffffff00) | tmp1
<< 0;
3612 case 5: buf0
[1] = (buf0
[1] & 0xffff00ff) | tmp1
<< 8;
3614 case 6: buf0
[1] = (buf0
[1] & 0xff00ffff) | tmp1
<< 16;
3616 case 7: buf0
[1] = (buf0
[1] & 0x00ffffff) | tmp1
<< 24;
3618 case 8: buf0
[2] = (buf0
[2] & 0xffffff00) | tmp1
<< 0;
3620 case 9: buf0
[2] = (buf0
[2] & 0xffff00ff) | tmp1
<< 8;
3622 case 10: buf0
[2] = (buf0
[2] & 0xff00ffff) | tmp1
<< 16;
3624 case 11: buf0
[2] = (buf0
[2] & 0x00ffffff) | tmp1
<< 24;
3626 case 12: buf0
[3] = (buf0
[3] & 0xffffff00) | tmp1
<< 0;
3628 case 13: buf0
[3] = (buf0
[3] & 0xffff00ff) | tmp1
<< 8;
3630 case 14: buf0
[3] = (buf0
[3] & 0xff00ffff) | tmp1
<< 16;
3632 case 15: buf0
[3] = (buf0
[3] & 0x00ffffff) | tmp1
<< 24;
3634 case 16: buf1
[0] = (buf1
[0] & 0xffffff00) | tmp1
<< 0;
3636 case 17: buf1
[0] = (buf1
[0] & 0xffff00ff) | tmp1
<< 8;
3638 case 18: buf1
[0] = (buf1
[0] & 0xff00ffff) | tmp1
<< 16;
3640 case 19: buf1
[0] = (buf1
[0] & 0x00ffffff) | tmp1
<< 24;
3642 case 20: buf1
[1] = (buf1
[1] & 0xffffff00) | tmp1
<< 0;
3644 case 21: buf1
[1] = (buf1
[1] & 0xffff00ff) | tmp1
<< 8;
3646 case 22: buf1
[1] = (buf1
[1] & 0xff00ffff) | tmp1
<< 16;
3648 case 23: buf1
[1] = (buf1
[1] & 0x00ffffff) | tmp1
<< 24;
3650 case 24: buf1
[2] = (buf1
[2] & 0xffffff00) | tmp1
<< 0;
3652 case 25: buf1
[2] = (buf1
[2] & 0xffff00ff) | tmp1
<< 8;
3654 case 26: buf1
[2] = (buf1
[2] & 0xff00ffff) | tmp1
<< 16;
3656 case 27: buf1
[2] = (buf1
[2] & 0x00ffffff) | tmp1
<< 24;
3658 case 28: buf1
[3] = (buf1
[3] & 0xffffff00) | tmp1
<< 0;
3660 case 29: buf1
[3] = (buf1
[3] & 0xffff00ff) | tmp1
<< 8;
3662 case 30: buf1
[3] = (buf1
[3] & 0xff00ffff) | tmp1
<< 16;
3664 case 31: buf1
[3] = (buf1
[3] & 0x00ffffff) | tmp1
<< 24;
3672 static u32
rule_op_mangle_chr_shiftl (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3674 if (p0
>= in_len
) return (in_len
);
3676 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3681 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) << 1) & mr
); break;
3682 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) << 1) & mr
); break;
3683 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) << 1) & mr
); break;
3684 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) << 1) & mr
); break;
3685 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) << 1) & mr
); break;
3686 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) << 1) & mr
); break;
3687 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) << 1) & mr
); break;
3688 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) << 1) & mr
); break;
3694 static u32
rule_op_mangle_chr_shiftr (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3696 if (p0
>= in_len
) return (in_len
);
3698 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3703 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) >> 1) & mr
); break;
3704 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) >> 1) & mr
); break;
3705 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) >> 1) & mr
); break;
3706 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) >> 1) & mr
); break;
3707 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) >> 1) & mr
); break;
3708 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) >> 1) & mr
); break;
3709 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) >> 1) & mr
); break;
3710 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) >> 1) & mr
); break;
3716 static u32
rule_op_mangle_chr_incr (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3718 if (p0
>= in_len
) return (in_len
);
3720 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3723 const u32 n
= 0x01010101 & mr
;
3727 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) + n
) & mr
); break;
3728 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) + n
) & mr
); break;
3729 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) + n
) & mr
); break;
3730 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) + n
) & mr
); break;
3731 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) + n
) & mr
); break;
3732 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) + n
) & mr
); break;
3733 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) + n
) & mr
); break;
3734 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) + n
) & mr
); break;
3740 static u32
rule_op_mangle_chr_decr (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3742 if (p0
>= in_len
) return (in_len
);
3744 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3747 const u32 n
= 0x01010101 & mr
;
3751 case 0: buf0
[0] = (buf0
[0] & ml
) | (((buf0
[0] & mr
) - n
) & mr
); break;
3752 case 1: buf0
[1] = (buf0
[1] & ml
) | (((buf0
[1] & mr
) - n
) & mr
); break;
3753 case 2: buf0
[2] = (buf0
[2] & ml
) | (((buf0
[2] & mr
) - n
) & mr
); break;
3754 case 3: buf0
[3] = (buf0
[3] & ml
) | (((buf0
[3] & mr
) - n
) & mr
); break;
3755 case 4: buf1
[0] = (buf1
[0] & ml
) | (((buf1
[0] & mr
) - n
) & mr
); break;
3756 case 5: buf1
[1] = (buf1
[1] & ml
) | (((buf1
[1] & mr
) - n
) & mr
); break;
3757 case 6: buf1
[2] = (buf1
[2] & ml
) | (((buf1
[2] & mr
) - n
) & mr
); break;
3758 case 7: buf1
[3] = (buf1
[3] & ml
) | (((buf1
[3] & mr
) - n
) & mr
); break;
3764 static u32
rule_op_mangle_replace_np1 (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3766 if ((p0
+ 1) >= in_len
) return (in_len
);
3771 lshift_block (buf0
, buf1
, tib40
, tib41
);
3773 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3778 case 0: buf0
[0] = (buf0
[0] & ml
) | (tib40
[0] & mr
); break;
3779 case 1: buf0
[1] = (buf0
[1] & ml
) | (tib40
[1] & mr
); break;
3780 case 2: buf0
[2] = (buf0
[2] & ml
) | (tib40
[2] & mr
); break;
3781 case 3: buf0
[3] = (buf0
[3] & ml
) | (tib40
[3] & mr
); break;
3782 case 4: buf1
[0] = (buf1
[0] & ml
) | (tib41
[0] & mr
); break;
3783 case 5: buf1
[1] = (buf1
[1] & ml
) | (tib41
[1] & mr
); break;
3784 case 6: buf1
[2] = (buf1
[2] & ml
) | (tib41
[2] & mr
); break;
3785 case 7: buf1
[3] = (buf1
[3] & ml
) | (tib41
[3] & mr
); break;
3791 static u32
rule_op_mangle_replace_nm1 (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3793 if (p0
== 0) return (in_len
);
3795 if (p0
>= in_len
) return (in_len
);
3800 rshift_block (buf0
, buf1
, tib40
, tib41
);
3802 const u32 mr
= 0xffu
<< ((p0
& 3) * 8);
3807 case 0: buf0
[0] = (buf0
[0] & ml
) | (tib40
[0] & mr
); break;
3808 case 1: buf0
[1] = (buf0
[1] & ml
) | (tib40
[1] & mr
); break;
3809 case 2: buf0
[2] = (buf0
[2] & ml
) | (tib40
[2] & mr
); break;
3810 case 3: buf0
[3] = (buf0
[3] & ml
) | (tib40
[3] & mr
); break;
3811 case 4: buf1
[0] = (buf1
[0] & ml
) | (tib41
[0] & mr
); break;
3812 case 5: buf1
[1] = (buf1
[1] & ml
) | (tib41
[1] & mr
); break;
3813 case 6: buf1
[2] = (buf1
[2] & ml
) | (tib41
[2] & mr
); break;
3814 case 7: buf1
[3] = (buf1
[3] & ml
) | (tib41
[3] & mr
); break;
3820 static u32
rule_op_mangle_dupeblock_first (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3822 if (p0
> in_len
) return (in_len
);
3824 if ((in_len
+ p0
) >= 32) return (in_len
);
3826 u32 out_len
= in_len
;
3840 truncate_right (tib40
, tib41
, p0
);
3842 rshift_block_N (buf0
, buf1
, buf0
, buf1
, p0
);
3844 buf0
[0] |= tib40
[0];
3845 buf0
[1] |= tib40
[1];
3846 buf0
[2] |= tib40
[2];
3847 buf0
[3] |= tib40
[3];
3848 buf1
[0] |= tib41
[0];
3849 buf1
[1] |= tib41
[1];
3850 buf1
[2] |= tib41
[2];
3851 buf1
[3] |= tib41
[3];
3858 static u32
rule_op_mangle_dupeblock_last (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3860 if (p0
> in_len
) return (in_len
);
3862 if ((in_len
+ p0
) >= 32) return (in_len
);
3864 u32 out_len
= in_len
;
3869 rshift_block_N (buf0
, buf1
, tib40
, tib41
, p0
);
3871 truncate_left (tib40
, tib41
, out_len
);
3873 buf0
[0] |= tib40
[0];
3874 buf0
[1] |= tib40
[1];
3875 buf0
[2] |= tib40
[2];
3876 buf0
[3] |= tib40
[3];
3877 buf1
[0] |= tib41
[0];
3878 buf1
[1] |= tib41
[1];
3879 buf1
[2] |= tib41
[2];
3880 buf1
[3] |= tib41
[3];
3887 static u32
rule_op_mangle_title (const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
3889 buf0
[0] |= (generate_cmask (buf0
[0]));
3890 buf0
[1] |= (generate_cmask (buf0
[1]));
3891 buf0
[2] |= (generate_cmask (buf0
[2]));
3892 buf0
[3] |= (generate_cmask (buf0
[3]));
3893 buf1
[0] |= (generate_cmask (buf1
[0]));
3894 buf1
[1] |= (generate_cmask (buf1
[1]));
3895 buf1
[2] |= (generate_cmask (buf1
[2]));
3896 buf1
[3] |= (generate_cmask (buf1
[3]));
3899 buf0
[0] &= ~(0x00000020 & generate_cmask (buf0
[0]));
3901 for (u32 i
= 0; i
< in_len
; i
++)
3908 case 0: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6540);
3909 tmp1
= ~(0x00002000 & generate_cmask (buf0
[0])); break;
3910 case 1: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6541);
3911 tmp1
= ~(0x00200000 & generate_cmask (buf0
[0])); break;
3912 case 2: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6542);
3913 tmp1
= ~(0x20000000 & generate_cmask (buf0
[0])); break;
3914 case 3: tmp0
= __byte_perm_S (buf0
[0], 0, 0x6543);
3915 tmp1
= ~(0x00000020 & generate_cmask (buf0
[1])); break;
3916 case 4: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6540);
3917 tmp1
= ~(0x00002000 & generate_cmask (buf0
[1])); break;
3918 case 5: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6541);
3919 tmp1
= ~(0x00200000 & generate_cmask (buf0
[1])); break;
3920 case 6: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6542);
3921 tmp1
= ~(0x20000000 & generate_cmask (buf0
[1])); break;
3922 case 7: tmp0
= __byte_perm_S (buf0
[1], 0, 0x6543);
3923 tmp1
= ~(0x00000020 & generate_cmask (buf0
[2])); break;
3924 case 8: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6540);
3925 tmp1
= ~(0x00002000 & generate_cmask (buf0
[2])); break;
3926 case 9: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6541);
3927 tmp1
= ~(0x00200000 & generate_cmask (buf0
[2])); break;
3928 case 10: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6542);
3929 tmp1
= ~(0x20000000 & generate_cmask (buf0
[2])); break;
3930 case 11: tmp0
= __byte_perm_S (buf0
[2], 0, 0x6543);
3931 tmp1
= ~(0x00000020 & generate_cmask (buf0
[3])); break;
3932 case 12: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6540);
3933 tmp1
= ~(0x00002000 & generate_cmask (buf0
[3])); break;
3934 case 13: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6541);
3935 tmp1
= ~(0x00200000 & generate_cmask (buf0
[3])); break;
3936 case 14: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6542);
3937 tmp1
= ~(0x20000000 & generate_cmask (buf0
[3])); break;
3938 case 15: tmp0
= __byte_perm_S (buf0
[3], 0, 0x6543);
3939 tmp1
= ~(0x00000020 & generate_cmask (buf1
[0])); break;
3940 case 16: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6540);
3941 tmp1
= ~(0x00002000 & generate_cmask (buf1
[0])); break;
3942 case 17: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6541);
3943 tmp1
= ~(0x00200000 & generate_cmask (buf1
[0])); break;
3944 case 18: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6542);
3945 tmp1
= ~(0x20000000 & generate_cmask (buf1
[0])); break;
3946 case 19: tmp0
= __byte_perm_S (buf1
[0], 0, 0x6543);
3947 tmp1
= ~(0x00000020 & generate_cmask (buf1
[1])); break;
3948 case 20: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6540);
3949 tmp1
= ~(0x00002000 & generate_cmask (buf1
[1])); break;
3950 case 21: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6541);
3951 tmp1
= ~(0x00200000 & generate_cmask (buf1
[1])); break;
3952 case 22: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6542);
3953 tmp1
= ~(0x20000000 & generate_cmask (buf1
[1])); break;
3954 case 23: tmp0
= __byte_perm_S (buf1
[1], 0, 0x6543);
3955 tmp1
= ~(0x00000020 & generate_cmask (buf1
[2])); break;
3956 case 24: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6540);
3957 tmp1
= ~(0x00002000 & generate_cmask (buf1
[2])); break;
3958 case 25: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6541);
3959 tmp1
= ~(0x00200000 & generate_cmask (buf1
[2])); break;
3960 case 26: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6542);
3961 tmp1
= ~(0x20000000 & generate_cmask (buf1
[2])); break;
3962 case 27: tmp0
= __byte_perm_S (buf1
[2], 0, 0x6543);
3963 tmp1
= ~(0x00000020 & generate_cmask (buf1
[3])); break;
3964 case 28: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6540);
3965 tmp1
= ~(0x00002000 & generate_cmask (buf1
[3])); break;
3966 case 29: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6541);
3967 tmp1
= ~(0x00200000 & generate_cmask (buf1
[3])); break;
3968 case 30: tmp0
= __byte_perm_S (buf1
[3], 0, 0x6542);
3969 tmp1
= ~(0x20000000 & generate_cmask (buf1
[3])); break;
3974 if (tmp0
== ' ') buf0
[0] &= tmp1
;
3978 if (tmp0
== ' ') buf0
[1] &= tmp1
;
3982 if (tmp0
== ' ') buf0
[2] &= tmp1
;
3986 if (tmp0
== ' ') buf0
[3] &= tmp1
;
3990 if (tmp0
== ' ') buf1
[0] &= tmp1
;
3994 if (tmp0
== ' ') buf1
[1] &= tmp1
;
3998 if (tmp0
== ' ') buf1
[2] &= tmp1
;
4002 if (tmp0
== ' ') buf1
[3] &= tmp1
;
4007 #if defined IS_AMD || defined IS_GENERIC
4011 const uchar4 tmp0
= (uchar4
) (' ');
4012 const uchar4 tmp1
= (uchar4
) (0x00);
4013 const uchar4 tmp2
= (uchar4
) (0xff);
4017 tmp
= as_uchar4 (buf0
[0]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[0] = as_uint (tmp
);
4018 tmp
= as_uchar4 (buf0
[1]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[1] = as_uint (tmp
);
4019 tmp
= as_uchar4 (buf0
[2]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[2] = as_uint (tmp
);
4020 tmp
= as_uchar4 (buf0
[3]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib40
[3] = as_uint (tmp
);
4021 tmp
= as_uchar4 (buf1
[0]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[0] = as_uint (tmp
);
4022 tmp
= as_uchar4 (buf1
[1]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[1] = as_uint (tmp
);
4023 tmp
= as_uchar4 (buf1
[2]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[2] = as_uint (tmp
);
4024 tmp
= as_uchar4 (buf1
[3]); tmp
= select (tmp1
, tmp2
, tmp
== tmp0
); tib41
[3] = as_uint (tmp
);
4026 rshift_block (tib40
, tib41
, tib40
, tib41
); tib40
[0] |= 0xff;
4028 buf0
[0] &= ~(generate_cmask (buf0
[0]) & tib40
[0]);
4029 buf0
[1] &= ~(generate_cmask (buf0
[1]) & tib40
[1]);
4030 buf0
[2] &= ~(generate_cmask (buf0
[2]) & tib40
[2]);
4031 buf0
[3] &= ~(generate_cmask (buf0
[3]) & tib40
[3]);
4032 buf1
[0] &= ~(generate_cmask (buf1
[0]) & tib41
[0]);
4033 buf1
[1] &= ~(generate_cmask (buf1
[1]) & tib41
[1]);
4034 buf1
[2] &= ~(generate_cmask (buf1
[2]) & tib41
[2]);
4035 buf1
[3] &= ~(generate_cmask (buf1
[3]) & tib41
[3]);
4041 u32
apply_rule (const u32 name
, const u32 p0
, const u32 p1
, u32 buf0
[4], u32 buf1
[4], const u32 in_len
)
4043 u32 out_len
= in_len
;
4047 case RULE_OP_MANGLE_LREST
: out_len
= rule_op_mangle_lrest (p0
, p1
, buf0
, buf1
, out_len
); break;
4048 case RULE_OP_MANGLE_UREST
: out_len
= rule_op_mangle_urest (p0
, p1
, buf0
, buf1
, out_len
); break;
4049 case RULE_OP_MANGLE_LREST_UFIRST
: out_len
= rule_op_mangle_lrest_ufirst (p0
, p1
, buf0
, buf1
, out_len
); break;
4050 case RULE_OP_MANGLE_UREST_LFIRST
: out_len
= rule_op_mangle_urest_lfirst (p0
, p1
, buf0
, buf1
, out_len
); break;
4051 case RULE_OP_MANGLE_TREST
: out_len
= rule_op_mangle_trest (p0
, p1
, buf0
, buf1
, out_len
); break;
4052 case RULE_OP_MANGLE_TOGGLE_AT
: out_len
= rule_op_mangle_toggle_at (p0
, p1
, buf0
, buf1
, out_len
); break;
4053 case RULE_OP_MANGLE_REVERSE
: out_len
= rule_op_mangle_reverse (p0
, p1
, buf0
, buf1
, out_len
); break;
4054 case RULE_OP_MANGLE_DUPEWORD
: out_len
= rule_op_mangle_dupeword (p0
, p1
, buf0
, buf1
, out_len
); break;
4055 case RULE_OP_MANGLE_DUPEWORD_TIMES
: out_len
= rule_op_mangle_dupeword_times (p0
, p1
, buf0
, buf1
, out_len
); break;
4056 case RULE_OP_MANGLE_REFLECT
: out_len
= rule_op_mangle_reflect (p0
, p1
, buf0
, buf1
, out_len
); break;
4057 case RULE_OP_MANGLE_APPEND
: out_len
= rule_op_mangle_append (p0
, p1
, buf0
, buf1
, out_len
); break;
4058 case RULE_OP_MANGLE_PREPEND
: out_len
= rule_op_mangle_prepend (p0
, p1
, buf0
, buf1
, out_len
); break;
4059 case RULE_OP_MANGLE_ROTATE_LEFT
: out_len
= rule_op_mangle_rotate_left (p0
, p1
, buf0
, buf1
, out_len
); break;
4060 case RULE_OP_MANGLE_ROTATE_RIGHT
: out_len
= rule_op_mangle_rotate_right (p0
, p1
, buf0
, buf1
, out_len
); break;
4061 case RULE_OP_MANGLE_DELETE_FIRST
: out_len
= rule_op_mangle_delete_first (p0
, p1
, buf0
, buf1
, out_len
); break;
4062 case RULE_OP_MANGLE_DELETE_LAST
: out_len
= rule_op_mangle_delete_last (p0
, p1
, buf0
, buf1
, out_len
); break;
4063 case RULE_OP_MANGLE_DELETE_AT
: out_len
= rule_op_mangle_delete_at (p0
, p1
, buf0
, buf1
, out_len
); break;
4064 case RULE_OP_MANGLE_EXTRACT
: out_len
= rule_op_mangle_extract (p0
, p1
, buf0
, buf1
, out_len
); break;
4065 case RULE_OP_MANGLE_OMIT
: out_len
= rule_op_mangle_omit (p0
, p1
, buf0
, buf1
, out_len
); break;
4066 case RULE_OP_MANGLE_INSERT
: out_len
= rule_op_mangle_insert (p0
, p1
, buf0
, buf1
, out_len
); break;
4067 case RULE_OP_MANGLE_OVERSTRIKE
: out_len
= rule_op_mangle_overstrike (p0
, p1
, buf0
, buf1
, out_len
); break;
4068 case RULE_OP_MANGLE_TRUNCATE_AT
: out_len
= rule_op_mangle_truncate_at (p0
, p1
, buf0
, buf1
, out_len
); break;
4069 case RULE_OP_MANGLE_REPLACE
: out_len
= rule_op_mangle_replace (p0
, p1
, buf0
, buf1
, out_len
); break;
4070 //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
4071 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
4072 case RULE_OP_MANGLE_DUPECHAR_FIRST
: out_len
= rule_op_mangle_dupechar_first (p0
, p1
, buf0
, buf1
, out_len
); break;
4073 case RULE_OP_MANGLE_DUPECHAR_LAST
: out_len
= rule_op_mangle_dupechar_last (p0
, p1
, buf0
, buf1
, out_len
); break;
4074 case RULE_OP_MANGLE_DUPECHAR_ALL
: out_len
= rule_op_mangle_dupechar_all (p0
, p1
, buf0
, buf1
, out_len
); break;
4075 case RULE_OP_MANGLE_SWITCH_FIRST
: out_len
= rule_op_mangle_switch_first (p0
, p1
, buf0
, buf1
, out_len
); break;
4076 case RULE_OP_MANGLE_SWITCH_LAST
: out_len
= rule_op_mangle_switch_last (p0
, p1
, buf0
, buf1
, out_len
); break;
4077 case RULE_OP_MANGLE_SWITCH_AT
: out_len
= rule_op_mangle_switch_at (p0
, p1
, buf0
, buf1
, out_len
); break;
4078 case RULE_OP_MANGLE_CHR_SHIFTL
: out_len
= rule_op_mangle_chr_shiftl (p0
, p1
, buf0
, buf1
, out_len
); break;
4079 case RULE_OP_MANGLE_CHR_SHIFTR
: out_len
= rule_op_mangle_chr_shiftr (p0
, p1
, buf0
, buf1
, out_len
); break;
4080 case RULE_OP_MANGLE_CHR_INCR
: out_len
= rule_op_mangle_chr_incr (p0
, p1
, buf0
, buf1
, out_len
); break;
4081 case RULE_OP_MANGLE_CHR_DECR
: out_len
= rule_op_mangle_chr_decr (p0
, p1
, buf0
, buf1
, out_len
); break;
4082 case RULE_OP_MANGLE_REPLACE_NP1
: out_len
= rule_op_mangle_replace_np1 (p0
, p1
, buf0
, buf1
, out_len
); break;
4083 case RULE_OP_MANGLE_REPLACE_NM1
: out_len
= rule_op_mangle_replace_nm1 (p0
, p1
, buf0
, buf1
, out_len
); break;
4084 case RULE_OP_MANGLE_DUPEBLOCK_FIRST
: out_len
= rule_op_mangle_dupeblock_first (p0
, p1
, buf0
, buf1
, out_len
); break;
4085 case RULE_OP_MANGLE_DUPEBLOCK_LAST
: out_len
= rule_op_mangle_dupeblock_last (p0
, p1
, buf0
, buf1
, out_len
); break;
4086 case RULE_OP_MANGLE_TITLE
: out_len
= rule_op_mangle_title (p0
, p1
, buf0
, buf1
, out_len
); break;
4092 u32
apply_rules (const __global u32
*cmds
, u32 buf0
[4], u32 buf1
[4], const u32 len
)
4096 for (u32 i
= 0; cmds
[i
] != 0; i
++)
4098 const u32 cmd
= cmds
[i
];
4100 const u32 name
= (cmd
>> 0) & 0xff;
4101 const u32 p0
= (cmd
>> 8) & 0xff;
4102 const u32 p1
= (cmd
>> 16) & 0xff;
4104 out_len
= apply_rule (name
, p0
, p1
, buf0
, buf1
, out_len
);
4110 u32x
apply_rules_vect (const u32 pw_buf0
[4], const u32 pw_buf1
[4], const u32 pw_len
, const __global kernel_rule_t
*rules_buf
, const u32 il_pos
, u32x w0
[4], u32x w1
[4])
4123 return apply_rules (rules_buf
[il_pos
].cmds
, w0
, w1
, pw_len
);
4130 for (int i
= 0; i
< VECT_SIZE
; i
++)
4135 tmp0
[0] = pw_buf0
[0];
4136 tmp0
[1] = pw_buf0
[1];
4137 tmp0
[2] = pw_buf0
[2];
4138 tmp0
[3] = pw_buf0
[3];
4139 tmp1
[0] = pw_buf1
[0];
4140 tmp1
[1] = pw_buf1
[1];
4141 tmp1
[2] = pw_buf1
[2];
4142 tmp1
[3] = pw_buf1
[3];
4144 const u32 tmp_len
= apply_rules (rules_buf
[il_pos
+ i
].cmds
, tmp0
, tmp1
, pw_len
);
4158 out_len
.s0
= tmp_len
;
4170 out_len
.s1
= tmp_len
;
4184 out_len
.s2
= tmp_len
;
4196 out_len
.s3
= tmp_len
;
4210 out_len
.s4
= tmp_len
;
4222 out_len
.s5
= tmp_len
;
4234 out_len
.s6
= tmp_len
;
4246 out_len
.s7
= tmp_len
;
4260 out_len
.s8
= tmp_len
;
4272 out_len
.s9
= tmp_len
;
4284 out_len
.sa
= tmp_len
;
4296 out_len
.sb
= tmp_len
;
4308 out_len
.sc
= tmp_len
;
4320 out_len
.sd
= tmp_len
;
4332 out_len
.se
= tmp_len
;
4344 out_len
.sf
= tmp_len
;