Merge pull request #141 from philsmd/master
[hashcat.git] / OpenCL / rp.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 static u32 generate_cmask (u32 buf)
7 {
8 const u32 rmask = ((buf & 0x40404040) >> 1)
9 & ~((buf & 0x80808080) >> 2);
10
11 const u32 hmask = (buf & 0x1f1f1f1f) + 0x05050505;
12 const u32 lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f;
13
14 return rmask & ~hmask & lmask;
15 }
16
17 static void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
18 {
19 const u32 tmp = (1 << ((len % 4) * 8)) - 1;
20
21 switch (len / 4)
22 {
23 case 0: w0[0] &= tmp;
24 w0[1] = 0;
25 w0[2] = 0;
26 w0[3] = 0;
27 w1[0] = 0;
28 w1[1] = 0;
29 w1[2] = 0;
30 w1[3] = 0;
31 break;
32 case 1: w0[1] &= tmp;
33 w0[2] = 0;
34 w0[3] = 0;
35 w1[0] = 0;
36 w1[1] = 0;
37 w1[2] = 0;
38 w1[3] = 0;
39 break;
40 case 2: w0[2] &= tmp;
41 w0[3] = 0;
42 w1[0] = 0;
43 w1[1] = 0;
44 w1[2] = 0;
45 w1[3] = 0;
46 break;
47 case 3: w0[3] &= tmp;
48 w1[0] = 0;
49 w1[1] = 0;
50 w1[2] = 0;
51 w1[3] = 0;
52 break;
53 case 4: w1[0] &= tmp;
54 w1[1] = 0;
55 w1[2] = 0;
56 w1[3] = 0;
57 break;
58 case 5: w1[1] &= tmp;
59 w1[2] = 0;
60 w1[3] = 0;
61 break;
62 case 6: w1[2] &= tmp;
63 w1[3] = 0;
64 break;
65 case 7: w1[3] &= tmp;
66 break;
67 }
68 }
69
70 static void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
71 {
72 const u32 tmp = ~((1 << ((len % 4) * 8)) - 1);
73
74 switch (len / 4)
75 {
76 case 0: w0[0] &= tmp;
77 break;
78 case 1: w0[0] = 0;
79 w0[1] &= tmp;
80 break;
81 case 2: w0[0] = 0;
82 w0[1] = 0;
83 w0[2] &= tmp;
84 break;
85 case 3: w0[0] = 0;
86 w0[1] = 0;
87 w0[2] = 0;
88 w0[3] &= tmp;
89 break;
90 case 4: w0[0] = 0;
91 w0[1] = 0;
92 w0[2] = 0;
93 w0[3] = 0;
94 w1[0] &= tmp;
95 break;
96 case 5: w0[0] = 0;
97 w0[1] = 0;
98 w0[2] = 0;
99 w0[3] = 0;
100 w1[0] = 0;
101 w1[1] &= tmp;
102 break;
103 case 6: w0[0] = 0;
104 w0[1] = 0;
105 w0[2] = 0;
106 w0[3] = 0;
107 w1[0] = 0;
108 w1[1] = 0;
109 w1[2] &= tmp;
110 break;
111 case 7: w0[0] = 0;
112 w0[1] = 0;
113 w0[2] = 0;
114 w0[3] = 0;
115 w1[0] = 0;
116 w1[1] = 0;
117 w1[2] = 0;
118 w1[3] &= tmp;
119 break;
120 }
121 }
122
123 static void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
124 {
125 #ifdef IS_NV
126 out0[0] = __byte_perm (in0[0], in0[1], 0x4321);
127 out0[1] = __byte_perm (in0[1], in0[2], 0x4321);
128 out0[2] = __byte_perm (in0[2], in0[3], 0x4321);
129 out0[3] = __byte_perm (in0[3], in1[0], 0x4321);
130 out1[0] = __byte_perm (in1[0], in1[1], 0x4321);
131 out1[1] = __byte_perm (in1[1], in1[2], 0x4321);
132 out1[2] = __byte_perm (in1[2], in1[3], 0x4321);
133 out1[3] = __byte_perm (in1[3], 0, 0x4321);
134 #endif
135
136 #if defined IS_AMD || defined IS_GENERIC
137 out0[0] = amd_bytealign (in0[1], in0[0], 1);
138 out0[1] = amd_bytealign (in0[2], in0[1], 1);
139 out0[2] = amd_bytealign (in0[3], in0[2], 1);
140 out0[3] = amd_bytealign (in1[0], in0[3], 1);
141 out1[0] = amd_bytealign (in1[1], in1[0], 1);
142 out1[1] = amd_bytealign (in1[2], in1[1], 1);
143 out1[2] = amd_bytealign (in1[3], in1[2], 1);
144 out1[3] = amd_bytealign ( 0, in1[3], 1);
145 #endif
146 }
147
148 static void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
149 {
150 #ifdef IS_NV
151 out1[3] = __byte_perm (in1[2], in1[3], 0x6543);
152 out1[2] = __byte_perm (in1[1], in1[2], 0x6543);
153 out1[1] = __byte_perm (in1[0], in1[1], 0x6543);
154 out1[0] = __byte_perm (in0[3], in1[0], 0x6543);
155 out0[3] = __byte_perm (in0[2], in0[3], 0x6543);
156 out0[2] = __byte_perm (in0[1], in0[2], 0x6543);
157 out0[1] = __byte_perm (in0[0], in0[1], 0x6543);
158 out0[0] = __byte_perm ( 0, in0[0], 0x6543);
159 #endif
160
161 #if defined IS_AMD || defined IS_GENERIC
162 out1[3] = amd_bytealign (in1[3], in1[2], 3);
163 out1[2] = amd_bytealign (in1[2], in1[1], 3);
164 out1[1] = amd_bytealign (in1[1], in1[0], 3);
165 out1[0] = amd_bytealign (in1[0], in0[3], 3);
166 out0[3] = amd_bytealign (in0[3], in0[2], 3);
167 out0[2] = amd_bytealign (in0[2], in0[1], 3);
168 out0[1] = amd_bytealign (in0[1], in0[0], 3);
169 out0[0] = amd_bytealign (in0[0], 0, 3);
170 #endif
171 }
172
173 static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
174 {
175 #ifdef IS_NV
176 switch (num)
177 {
178 case 0: out0[0] = in0[0];
179 out0[1] = in0[1];
180 out0[2] = in0[2];
181 out0[3] = in0[3];
182 out1[0] = in1[0];
183 out1[1] = in1[1];
184 out1[2] = in1[2];
185 out1[3] = in1[3];
186 break;
187 case 1: out0[0] = __byte_perm (in0[0], in0[1], 0x4321);
188 out0[1] = __byte_perm (in0[1], in0[2], 0x4321);
189 out0[2] = __byte_perm (in0[2], in0[3], 0x4321);
190 out0[3] = __byte_perm (in0[3], in1[0], 0x4321);
191 out1[0] = __byte_perm (in1[0], in1[1], 0x4321);
192 out1[1] = __byte_perm (in1[1], in1[2], 0x4321);
193 out1[2] = __byte_perm (in1[2], in1[3], 0x4321);
194 out1[3] = __byte_perm (in1[3], 0, 0x4321);
195 break;
196 case 2: out0[0] = __byte_perm (in0[0], in0[1], 0x5432);
197 out0[1] = __byte_perm (in0[1], in0[2], 0x5432);
198 out0[2] = __byte_perm (in0[2], in0[3], 0x5432);
199 out0[3] = __byte_perm (in0[3], in1[0], 0x5432);
200 out1[0] = __byte_perm (in1[0], in1[1], 0x5432);
201 out1[1] = __byte_perm (in1[1], in1[2], 0x5432);
202 out1[2] = __byte_perm (in1[2], in1[3], 0x5432);
203 out1[3] = __byte_perm (in1[3], 0, 0x5432);
204 break;
205 case 3: out0[0] = __byte_perm (in0[0], in0[1], 0x6543);
206 out0[1] = __byte_perm (in0[1], in0[2], 0x6543);
207 out0[2] = __byte_perm (in0[2], in0[3], 0x6543);
208 out0[3] = __byte_perm (in0[3], in1[0], 0x6543);
209 out1[0] = __byte_perm (in1[0], in1[1], 0x6543);
210 out1[1] = __byte_perm (in1[1], in1[2], 0x6543);
211 out1[2] = __byte_perm (in1[2], in1[3], 0x6543);
212 out1[3] = __byte_perm (in1[3], 0, 0x6543);
213 break;
214 case 4: out0[0] = in0[1];
215 out0[1] = in0[2];
216 out0[2] = in0[3];
217 out0[3] = in1[0];
218 out1[0] = in1[1];
219 out1[1] = in1[2];
220 out1[2] = in1[3];
221 out1[3] = 0;
222 break;
223 case 5: out0[0] = __byte_perm (in0[1], in0[2], 0x4321);
224 out0[1] = __byte_perm (in0[2], in0[3], 0x4321);
225 out0[2] = __byte_perm (in0[3], in1[0], 0x4321);
226 out0[3] = __byte_perm (in1[0], in1[1], 0x4321);
227 out1[0] = __byte_perm (in1[1], in1[2], 0x4321);
228 out1[1] = __byte_perm (in1[2], in1[3], 0x4321);
229 out1[2] = __byte_perm (in1[3], 0, 0x4321);
230 out1[3] = 0;
231 break;
232 case 6: out0[0] = __byte_perm (in0[1], in0[2], 0x5432);
233 out0[1] = __byte_perm (in0[2], in0[3], 0x5432);
234 out0[2] = __byte_perm (in0[3], in1[0], 0x5432);
235 out0[3] = __byte_perm (in1[0], in1[1], 0x5432);
236 out1[0] = __byte_perm (in1[1], in1[2], 0x5432);
237 out1[1] = __byte_perm (in1[2], in1[3], 0x5432);
238 out1[2] = __byte_perm (in1[3], 0, 0x5432);
239 out1[3] = 0;
240 break;
241 case 7: out0[0] = __byte_perm (in0[1], in0[2], 0x6543);
242 out0[1] = __byte_perm (in0[2], in0[3], 0x6543);
243 out0[2] = __byte_perm (in0[3], in1[0], 0x6543);
244 out0[3] = __byte_perm (in1[0], in1[1], 0x6543);
245 out1[0] = __byte_perm (in1[1], in1[2], 0x6543);
246 out1[1] = __byte_perm (in1[2], in1[3], 0x6543);
247 out1[2] = __byte_perm (in1[3], 0, 0x6543);
248 out1[3] = 0;
249 break;
250 case 8: out0[0] = in0[2];
251 out0[1] = in0[3];
252 out0[2] = in1[0];
253 out0[3] = in1[1];
254 out1[0] = in1[2];
255 out1[1] = in1[3];
256 out1[2] = 0;
257 out1[3] = 0;
258 break;
259 case 9: out0[0] = __byte_perm (in0[2], in0[3], 0x4321);
260 out0[1] = __byte_perm (in0[3], in1[0], 0x4321);
261 out0[2] = __byte_perm (in1[0], in1[1], 0x4321);
262 out0[3] = __byte_perm (in1[1], in1[2], 0x4321);
263 out1[0] = __byte_perm (in1[2], in1[3], 0x4321);
264 out1[1] = __byte_perm (in1[3], 0, 0x4321);
265 out1[2] = 0;
266 out1[3] = 0;
267 break;
268 case 10: out0[0] = __byte_perm (in0[2], in0[3], 0x5432);
269 out0[1] = __byte_perm (in0[3], in1[0], 0x5432);
270 out0[2] = __byte_perm (in1[0], in1[1], 0x5432);
271 out0[3] = __byte_perm (in1[1], in1[2], 0x5432);
272 out1[0] = __byte_perm (in1[2], in1[3], 0x5432);
273 out1[1] = __byte_perm (in1[3], 0, 0x5432);
274 out1[2] = 0;
275 out1[3] = 0;
276 break;
277 case 11: out0[0] = __byte_perm (in0[2], in0[3], 0x6543);
278 out0[1] = __byte_perm (in0[3], in1[0], 0x6543);
279 out0[2] = __byte_perm (in1[0], in1[1], 0x6543);
280 out0[3] = __byte_perm (in1[1], in1[2], 0x6543);
281 out1[0] = __byte_perm (in1[2], in1[3], 0x6543);
282 out1[1] = __byte_perm (in1[3], 0, 0x6543);
283 out1[2] = 0;
284 out1[3] = 0;
285 break;
286 case 12: out0[0] = in0[3];
287 out0[1] = in1[0];
288 out0[2] = in1[1];
289 out0[3] = in1[2];
290 out1[0] = in1[3];
291 out1[1] = 0;
292 out1[2] = 0;
293 out1[3] = 0;
294 break;
295 case 13:
296 out0[0] = __byte_perm (in0[3], in1[0], 0x4321);
297 out0[1] = __byte_perm (in1[0], in1[1], 0x4321);
298 out0[2] = __byte_perm (in1[1], in1[2], 0x4321);
299 out0[3] = __byte_perm (in1[2], in1[3], 0x4321);
300 out1[0] = __byte_perm (in1[3], 0, 0x4321);
301 out1[1] = 0;
302 out1[2] = 0;
303 out1[3] = 0;
304 break;
305 case 14: out0[0] = __byte_perm (in0[3], in1[0], 0x5432);
306 out0[1] = __byte_perm (in1[0], in1[1], 0x5432);
307 out0[2] = __byte_perm (in1[1], in1[2], 0x5432);
308 out0[3] = __byte_perm (in1[2], in1[3], 0x5432);
309 out1[0] = __byte_perm (in1[3], 0, 0x5432);
310 out1[1] = 0;
311 out1[2] = 0;
312 out1[3] = 0;
313 break;
314 case 15: out0[0] = __byte_perm (in0[3], in1[0], 0x6543);
315 out0[1] = __byte_perm (in1[0], in1[1], 0x6543);
316 out0[2] = __byte_perm (in1[1], in1[2], 0x6543);
317 out0[3] = __byte_perm (in1[2], in1[3], 0x6543);
318 out1[0] = __byte_perm (in1[3], 0, 0x6543);
319 out1[1] = 0;
320 out1[2] = 0;
321 out1[3] = 0;
322 break;
323 case 16: out0[0] = in1[0];
324 out0[1] = in1[1];
325 out0[2] = in1[2];
326 out0[3] = in1[3];
327 out1[0] = 0;
328 out1[1] = 0;
329 out1[2] = 0;
330 out1[3] = 0;
331 break;
332 case 17: out0[0] = __byte_perm (in1[0], in1[1], 0x4321);
333 out0[1] = __byte_perm (in1[1], in1[2], 0x4321);
334 out0[2] = __byte_perm (in1[2], in1[3], 0x4321);
335 out0[3] = __byte_perm (in1[3], 0, 0x4321);
336 out1[0] = 0;
337 out1[1] = 0;
338 out1[2] = 0;
339 out1[3] = 0;
340 break;
341 case 18: out0[0] = __byte_perm (in1[0], in1[1], 0x5432);
342 out0[1] = __byte_perm (in1[1], in1[2], 0x5432);
343 out0[2] = __byte_perm (in1[2], in1[3], 0x5432);
344 out0[3] = __byte_perm (in1[3], 0, 0x5432);
345 out1[0] = 0;
346 out1[1] = 0;
347 out1[2] = 0;
348 out1[3] = 0;
349 break;
350 case 19: out0[0] = __byte_perm (in1[0], in1[1], 0x6543);
351 out0[1] = __byte_perm (in1[1], in1[2], 0x6543);
352 out0[2] = __byte_perm (in1[2], in1[3], 0x6543);
353 out0[3] = __byte_perm (in1[3], 0, 0x6543);
354 out1[0] = 0;
355 out1[1] = 0;
356 out1[2] = 0;
357 out1[3] = 0;
358 break;
359 case 20: out0[0] = in1[1];
360 out0[1] = in1[2];
361 out0[2] = in1[3];
362 out0[3] = 0;
363 out1[0] = 0;
364 out1[1] = 0;
365 out1[2] = 0;
366 out1[3] = 0;
367 break;
368 case 21: out0[0] = __byte_perm (in1[1], in1[2], 0x4321);
369 out0[1] = __byte_perm (in1[2], in1[3], 0x4321);
370 out0[2] = __byte_perm (in1[3], 0, 0x4321);
371 out0[3] = 0;
372 out1[0] = 0;
373 out1[1] = 0;
374 out1[2] = 0;
375 out1[3] = 0;
376 break;
377 case 22: out0[0] = __byte_perm (in1[1], in1[2], 0x5432);
378 out0[1] = __byte_perm (in1[2], in1[3], 0x5432);
379 out0[2] = __byte_perm (in1[3], 0, 0x5432);
380 out0[3] = 0;
381 out1[0] = 0;
382 out1[1] = 0;
383 out1[2] = 0;
384 out1[3] = 0;
385 break;
386 case 23: out0[0] = __byte_perm (in1[1], in1[2], 0x6543);
387 out0[1] = __byte_perm (in1[2], in1[3], 0x6543);
388 out0[2] = __byte_perm (in1[3], 0, 0x6543);
389 out0[3] = 0;
390 out1[0] = 0;
391 out1[1] = 0;
392 out1[2] = 0;
393 out1[3] = 0;
394 break;
395 case 24: out0[0] = in1[2];
396 out0[1] = in1[3];
397 out0[2] = 0;
398 out0[3] = 0;
399 out1[0] = 0;
400 out1[1] = 0;
401 out1[2] = 0;
402 out1[3] = 0;
403 break;
404 case 25: out0[0] = __byte_perm (in1[2], in1[3], 0x4321);
405 out0[1] = __byte_perm (in1[3], 0, 0x4321);
406 out0[2] = 0;
407 out0[3] = 0;
408 out1[0] = 0;
409 out1[1] = 0;
410 out1[2] = 0;
411 out1[3] = 0;
412 break;
413 case 26: out0[0] = __byte_perm (in1[2], in1[3], 0x5432);
414 out0[1] = __byte_perm (in1[3], 0, 0x5432);
415 out0[2] = 0;
416 out0[3] = 0;
417 out1[0] = 0;
418 out1[1] = 0;
419 out1[2] = 0;
420 out1[3] = 0;
421 break;
422 case 27: out0[0] = __byte_perm (in1[2], in1[3], 0x6543);
423 out0[1] = __byte_perm (in1[3], 0, 0x6543);
424 out0[2] = 0;
425 out0[3] = 0;
426 out1[0] = 0;
427 out1[1] = 0;
428 out1[2] = 0;
429 out1[3] = 0;
430 break;
431 case 28: out0[0] = in1[3];
432 out0[1] = 0;
433 out0[2] = 0;
434 out0[3] = 0;
435 out1[0] = 0;
436 out1[1] = 0;
437 out1[2] = 0;
438 out1[3] = 0;
439 break;
440 case 29: out0[0] = __byte_perm (in1[3], 0, 0x4321);
441 out0[1] = 0;
442 out0[2] = 0;
443 out0[3] = 0;
444 out1[0] = 0;
445 out1[1] = 0;
446 out1[2] = 0;
447 out1[3] = 0;
448 break;
449 case 30: out0[0] = __byte_perm (in1[3], 0, 0x5432);
450 out0[1] = 0;
451 out0[2] = 0;
452 out0[3] = 0;
453 out1[0] = 0;
454 out1[1] = 0;
455 out1[2] = 0;
456 out1[3] = 0;
457 break;
458 case 31: out0[0] = __byte_perm (in1[3], 0, 0x6543);
459 out0[1] = 0;
460 out0[2] = 0;
461 out0[3] = 0;
462 out1[0] = 0;
463 out1[1] = 0;
464 out1[2] = 0;
465 out1[3] = 0;
466 break;
467 }
468 #endif
469
470 #if defined IS_AMD || defined IS_GENERIC
471 switch (num)
472 {
473 case 0: out0[0] = in0[0];
474 out0[1] = in0[1];
475 out0[2] = in0[2];
476 out0[3] = in0[3];
477 out1[0] = in1[0];
478 out1[1] = in1[1];
479 out1[2] = in1[2];
480 out1[3] = in1[3];
481 break;
482 case 1: out0[0] = amd_bytealign (in0[1], in0[0], 1);
483 out0[1] = amd_bytealign (in0[2], in0[1], 1);
484 out0[2] = amd_bytealign (in0[3], in0[2], 1);
485 out0[3] = amd_bytealign (in1[0], in0[3], 1);
486 out1[0] = amd_bytealign (in1[1], in1[0], 1);
487 out1[1] = amd_bytealign (in1[2], in1[1], 1);
488 out1[2] = amd_bytealign (in1[3], in1[2], 1);
489 out1[3] = amd_bytealign ( 0, in1[3], 1);
490 break;
491 case 2: out0[0] = amd_bytealign (in0[1], in0[0], 2);
492 out0[1] = amd_bytealign (in0[2], in0[1], 2);
493 out0[2] = amd_bytealign (in0[3], in0[2], 2);
494 out0[3] = amd_bytealign (in1[0], in0[3], 2);
495 out1[0] = amd_bytealign (in1[1], in1[0], 2);
496 out1[1] = amd_bytealign (in1[2], in1[1], 2);
497 out1[2] = amd_bytealign (in1[3], in1[2], 2);
498 out1[3] = amd_bytealign ( 0, in1[3], 2);
499 break;
500 case 3: out0[0] = amd_bytealign (in0[1], in0[0], 3);
501 out0[1] = amd_bytealign (in0[2], in0[1], 3);
502 out0[2] = amd_bytealign (in0[3], in0[2], 3);
503 out0[3] = amd_bytealign (in1[0], in0[3], 3);
504 out1[0] = amd_bytealign (in1[1], in1[0], 3);
505 out1[1] = amd_bytealign (in1[2], in1[1], 3);
506 out1[2] = amd_bytealign (in1[3], in1[2], 3);
507 out1[3] = amd_bytealign ( 0, in1[3], 3);
508 break;
509 case 4: out0[0] = in0[1];
510 out0[1] = in0[2];
511 out0[2] = in0[3];
512 out0[3] = in1[0];
513 out1[0] = in1[1];
514 out1[1] = in1[2];
515 out1[2] = in1[3];
516 out1[3] = 0;
517 break;
518 case 5: out0[0] = amd_bytealign (in0[2], in0[1], 1);
519 out0[1] = amd_bytealign (in0[3], in0[2], 1);
520 out0[2] = amd_bytealign (in1[0], in0[3], 1);
521 out0[3] = amd_bytealign (in1[1], in1[0], 1);
522 out1[0] = amd_bytealign (in1[2], in1[1], 1);
523 out1[1] = amd_bytealign (in1[3], in1[2], 1);
524 out1[2] = amd_bytealign ( 0, in1[3], 1);
525 out1[3] = 0;
526 break;
527 case 6: out0[0] = amd_bytealign (in0[2], in0[1], 2);
528 out0[1] = amd_bytealign (in0[3], in0[2], 2);
529 out0[2] = amd_bytealign (in1[0], in0[3], 2);
530 out0[3] = amd_bytealign (in1[1], in1[0], 2);
531 out1[0] = amd_bytealign (in1[2], in1[1], 2);
532 out1[1] = amd_bytealign (in1[3], in1[2], 2);
533 out1[2] = amd_bytealign ( 0, in1[3], 2);
534 out1[3] = 0;
535 break;
536 case 7: out0[0] = amd_bytealign (in0[2], in0[1], 3);
537 out0[1] = amd_bytealign (in0[3], in0[2], 3);
538 out0[2] = amd_bytealign (in1[0], in0[3], 3);
539 out0[3] = amd_bytealign (in1[1], in1[0], 3);
540 out1[0] = amd_bytealign (in1[2], in1[1], 3);
541 out1[1] = amd_bytealign (in1[3], in1[2], 3);
542 out1[2] = amd_bytealign ( 0, in1[3], 3);
543 out1[3] = 0;
544 break;
545 case 8: out0[0] = in0[2];
546 out0[1] = in0[3];
547 out0[2] = in1[0];
548 out0[3] = in1[1];
549 out1[0] = in1[2];
550 out1[1] = in1[3];
551 out1[2] = 0;
552 out1[3] = 0;
553 break;
554 case 9: out0[0] = amd_bytealign (in0[3], in0[2], 1);
555 out0[1] = amd_bytealign (in1[0], in0[3], 1);
556 out0[2] = amd_bytealign (in1[1], in1[0], 1);
557 out0[3] = amd_bytealign (in1[2], in1[1], 1);
558 out1[0] = amd_bytealign (in1[3], in1[2], 1);
559 out1[1] = amd_bytealign ( 0, in1[3], 1);
560 out1[2] = 0;
561 out1[3] = 0;
562 break;
563 case 10: out0[0] = amd_bytealign (in0[3], in0[2], 2);
564 out0[1] = amd_bytealign (in1[0], in0[3], 2);
565 out0[2] = amd_bytealign (in1[1], in1[0], 2);
566 out0[3] = amd_bytealign (in1[2], in1[1], 2);
567 out1[0] = amd_bytealign (in1[3], in1[2], 2);
568 out1[1] = amd_bytealign ( 0, in1[3], 2);
569 out1[2] = 0;
570 out1[3] = 0;
571 break;
572 case 11: out0[0] = amd_bytealign (in0[3], in0[2], 3);
573 out0[1] = amd_bytealign (in1[0], in0[3], 3);
574 out0[2] = amd_bytealign (in1[1], in1[0], 3);
575 out0[3] = amd_bytealign (in1[2], in1[1], 3);
576 out1[0] = amd_bytealign (in1[3], in1[2], 3);
577 out1[1] = amd_bytealign ( 0, in1[3], 3);
578 out1[2] = 0;
579 out1[3] = 0;
580 break;
581 case 12: out0[0] = in0[3];
582 out0[1] = in1[0];
583 out0[2] = in1[1];
584 out0[3] = in1[2];
585 out1[0] = in1[3];
586 out1[1] = 0;
587 out1[2] = 0;
588 out1[3] = 0;
589 break;
590 case 13: out0[0] = amd_bytealign (in1[0], in0[3], 1);
591 out0[1] = amd_bytealign (in1[1], in1[0], 1);
592 out0[2] = amd_bytealign (in1[2], in1[1], 1);
593 out0[3] = amd_bytealign (in1[3], in1[2], 1);
594 out1[0] = amd_bytealign ( 0, in1[3], 1);
595 out1[1] = 0;
596 out1[2] = 0;
597 out1[3] = 0;
598 break;
599 case 14: out0[0] = amd_bytealign (in1[0], in0[3], 2);
600 out0[1] = amd_bytealign (in1[1], in1[0], 2);
601 out0[2] = amd_bytealign (in1[2], in1[1], 2);
602 out0[3] = amd_bytealign (in1[3], in1[2], 2);
603 out1[0] = amd_bytealign ( 0, in1[3], 2);
604 out1[1] = 0;
605 out1[2] = 0;
606 out1[3] = 0;
607 break;
608 case 15: out0[0] = amd_bytealign (in1[0], in0[3], 3);
609 out0[1] = amd_bytealign (in1[1], in1[0], 3);
610 out0[2] = amd_bytealign (in1[2], in1[1], 3);
611 out0[3] = amd_bytealign (in1[3], in1[2], 3);
612 out1[0] = amd_bytealign ( 0, in1[3], 3);
613 out1[1] = 0;
614 out1[2] = 0;
615 out1[3] = 0;
616 break;
617 case 16: out0[0] = in1[0];
618 out0[1] = in1[1];
619 out0[2] = in1[2];
620 out0[3] = in1[3];
621 out1[0] = 0;
622 out1[1] = 0;
623 out1[2] = 0;
624 out1[3] = 0;
625 break;
626 case 17: out0[0] = amd_bytealign (in1[1], in1[0], 1);
627 out0[1] = amd_bytealign (in1[2], in1[1], 1);
628 out0[2] = amd_bytealign (in1[3], in1[2], 1);
629 out0[3] = amd_bytealign ( 0, in1[3], 1);
630 out1[0] = 0;
631 out1[1] = 0;
632 out1[2] = 0;
633 out1[3] = 0;
634 break;
635 case 18: out0[0] = amd_bytealign (in1[1], in1[0], 2);
636 out0[1] = amd_bytealign (in1[2], in1[1], 2);
637 out0[2] = amd_bytealign (in1[3], in1[2], 2);
638 out0[3] = amd_bytealign ( 0, in1[3], 2);
639 out1[0] = 0;
640 out1[1] = 0;
641 out1[2] = 0;
642 out1[3] = 0;
643 break;
644 case 19: out0[0] = amd_bytealign (in1[1], in1[0], 3);
645 out0[1] = amd_bytealign (in1[2], in1[1], 3);
646 out0[2] = amd_bytealign (in1[3], in1[2], 3);
647 out0[3] = amd_bytealign ( 0, in1[3], 3);
648 out1[0] = 0;
649 out1[1] = 0;
650 out1[2] = 0;
651 out1[3] = 0;
652 break;
653 case 20: out0[0] = in1[1];
654 out0[1] = in1[2];
655 out0[2] = in1[3];
656 out0[3] = 0;
657 out1[0] = 0;
658 out1[1] = 0;
659 out1[2] = 0;
660 out1[3] = 0;
661 break;
662 case 21: out0[0] = amd_bytealign (in1[2], in1[1], 1);
663 out0[1] = amd_bytealign (in1[3], in1[2], 1);
664 out0[2] = amd_bytealign ( 0, in1[3], 1);
665 out0[3] = 0;
666 out1[0] = 0;
667 out1[1] = 0;
668 out1[2] = 0;
669 out1[3] = 0;
670 break;
671 case 22: out0[0] = amd_bytealign (in1[2], in1[1], 2);
672 out0[1] = amd_bytealign (in1[3], in1[2], 2);
673 out0[2] = amd_bytealign ( 0, in1[3], 2);
674 out0[3] = 0;
675 out1[0] = 0;
676 out1[1] = 0;
677 out1[2] = 0;
678 out1[3] = 0;
679 break;
680 case 23: out0[0] = amd_bytealign (in1[2], in1[1], 3);
681 out0[1] = amd_bytealign (in1[3], in1[2], 3);
682 out0[2] = amd_bytealign ( 0, in1[3], 3);
683 out0[3] = 0;
684 out1[0] = 0;
685 out1[1] = 0;
686 out1[2] = 0;
687 out1[3] = 0;
688 break;
689 case 24: out0[0] = in1[2];
690 out0[1] = in1[3];
691 out0[2] = 0;
692 out0[3] = 0;
693 out1[0] = 0;
694 out1[1] = 0;
695 out1[2] = 0;
696 out1[3] = 0;
697 break;
698 case 25: out0[0] = amd_bytealign (in1[3], in1[2], 1);
699 out0[1] = amd_bytealign ( 0, in1[3], 1);
700 out0[2] = 0;
701 out0[3] = 0;
702 out1[0] = 0;
703 out1[1] = 0;
704 out1[2] = 0;
705 out1[3] = 0;
706 break;
707 case 26: out0[0] = amd_bytealign (in1[3], in1[2], 2);
708 out0[1] = amd_bytealign ( 0, in1[3], 2);
709 out0[2] = 0;
710 out0[3] = 0;
711 out1[0] = 0;
712 out1[1] = 0;
713 out1[2] = 0;
714 out1[3] = 0;
715 break;
716 case 27: out0[0] = amd_bytealign (in1[3], in1[2], 3);
717 out0[1] = amd_bytealign ( 0, in1[3], 3);
718 out0[2] = 0;
719 out0[3] = 0;
720 out1[0] = 0;
721 out1[1] = 0;
722 out1[2] = 0;
723 out1[3] = 0;
724 break;
725 case 28: out0[0] = in1[3];
726 out0[1] = 0;
727 out0[2] = 0;
728 out0[3] = 0;
729 out1[0] = 0;
730 out1[1] = 0;
731 out1[2] = 0;
732 out1[3] = 0;
733 break;
734 case 29: out0[0] = amd_bytealign ( 0, in1[3], 1);
735 out0[1] = 0;
736 out0[2] = 0;
737 out0[3] = 0;
738 out1[0] = 0;
739 out1[1] = 0;
740 out1[2] = 0;
741 out1[3] = 0;
742 break;
743 case 30: out0[0] = amd_bytealign ( 0, in1[3], 2);
744 out0[1] = 0;
745 out0[2] = 0;
746 out0[3] = 0;
747 out1[0] = 0;
748 out1[1] = 0;
749 out1[2] = 0;
750 out1[3] = 0;
751 break;
752 case 31: out0[0] = amd_bytealign ( 0, in1[3], 3);
753 out0[1] = 0;
754 out0[2] = 0;
755 out0[3] = 0;
756 out1[0] = 0;
757 out1[1] = 0;
758 out1[2] = 0;
759 out1[3] = 0;
760 break;
761 }
762 #endif
763 }
764
765 static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
766 {
767 #ifdef IS_NV
768 switch (num)
769 {
770 case 0: out1[3] = in1[3];
771 out1[2] = in1[2];
772 out1[1] = in1[1];
773 out1[0] = in1[0];
774 out0[3] = in0[3];
775 out0[2] = in0[2];
776 out0[1] = in0[1];
777 out0[0] = in0[0];
778 break;
779 case 1: out1[3] = __byte_perm (in1[2], in1[3], 0x6543);
780 out1[2] = __byte_perm (in1[1], in1[2], 0x6543);
781 out1[1] = __byte_perm (in1[0], in1[1], 0x6543);
782 out1[0] = __byte_perm (in0[3], in1[0], 0x6543);
783 out0[3] = __byte_perm (in0[2], in0[3], 0x6543);
784 out0[2] = __byte_perm (in0[1], in0[2], 0x6543);
785 out0[1] = __byte_perm (in0[0], in0[1], 0x6543);
786 out0[0] = __byte_perm ( 0, in0[0], 0x6543);
787 break;
788 case 2: out1[3] = __byte_perm (in1[2], in1[3], 0x5432);
789 out1[2] = __byte_perm (in1[1], in1[2], 0x5432);
790 out1[1] = __byte_perm (in1[0], in1[1], 0x5432);
791 out1[0] = __byte_perm (in0[3], in1[0], 0x5432);
792 out0[3] = __byte_perm (in0[2], in0[3], 0x5432);
793 out0[2] = __byte_perm (in0[1], in0[2], 0x5432);
794 out0[1] = __byte_perm (in0[0], in0[1], 0x5432);
795 out0[0] = __byte_perm ( 0, in0[0], 0x5432);
796 break;
797 case 3: out1[3] = __byte_perm (in1[2], in1[3], 0x4321);
798 out1[2] = __byte_perm (in1[1], in1[2], 0x4321);
799 out1[1] = __byte_perm (in1[0], in1[1], 0x4321);
800 out1[0] = __byte_perm (in0[3], in1[0], 0x4321);
801 out0[3] = __byte_perm (in0[2], in0[3], 0x4321);
802 out0[2] = __byte_perm (in0[1], in0[2], 0x4321);
803 out0[1] = __byte_perm (in0[0], in0[1], 0x4321);
804 out0[0] = __byte_perm ( 0, in0[0], 0x4321);
805 break;
806 case 4: out1[3] = in1[2];
807 out1[2] = in1[1];
808 out1[1] = in1[0];
809 out1[0] = in0[3];
810 out0[3] = in0[2];
811 out0[2] = in0[1];
812 out0[1] = in0[0];
813 out0[0] = 0;
814 break;
815 case 5: out1[3] = __byte_perm (in1[1], in1[2], 0x6543);
816 out1[2] = __byte_perm (in1[0], in1[1], 0x6543);
817 out1[1] = __byte_perm (in0[3], in1[0], 0x6543);
818 out1[0] = __byte_perm (in0[2], in0[3], 0x6543);
819 out0[3] = __byte_perm (in0[1], in0[2], 0x6543);
820 out0[2] = __byte_perm (in0[0], in0[1], 0x6543);
821 out0[1] = __byte_perm ( 0, in0[0], 0x6543);
822 out0[0] = 0;
823 break;
824 case 6: out1[3] = __byte_perm (in1[1], in1[2], 0x5432);
825 out1[2] = __byte_perm (in1[0], in1[1], 0x5432);
826 out1[1] = __byte_perm (in0[3], in1[0], 0x5432);
827 out1[0] = __byte_perm (in0[2], in0[3], 0x5432);
828 out0[3] = __byte_perm (in0[1], in0[2], 0x5432);
829 out0[2] = __byte_perm (in0[0], in0[1], 0x5432);
830 out0[1] = __byte_perm ( 0, in0[0], 0x5432);
831 out0[0] = 0;
832 break;
833 case 7: out1[3] = __byte_perm (in1[1], in1[2], 0x4321);
834 out1[2] = __byte_perm (in1[0], in1[1], 0x4321);
835 out1[1] = __byte_perm (in0[3], in1[0], 0x4321);
836 out1[0] = __byte_perm (in0[2], in0[3], 0x4321);
837 out0[3] = __byte_perm (in0[1], in0[2], 0x4321);
838 out0[2] = __byte_perm (in0[0], in0[1], 0x4321);
839 out0[1] = __byte_perm ( 0, in0[0], 0x4321);
840 out0[0] = 0;
841 break;
842 case 8: out1[3] = in1[1];
843 out1[2] = in1[0];
844 out1[1] = in0[3];
845 out1[0] = in0[2];
846 out0[3] = in0[1];
847 out0[2] = in0[0];
848 out0[1] = 0;
849 out0[0] = 0;
850 break;
851 case 9: out1[3] = __byte_perm (in1[0], in1[1], 0x6543);
852 out1[2] = __byte_perm (in0[3], in1[0], 0x6543);
853 out1[1] = __byte_perm (in0[2], in0[3], 0x6543);
854 out1[0] = __byte_perm (in0[1], in0[2], 0x6543);
855 out0[3] = __byte_perm (in0[0], in0[1], 0x6543);
856 out0[2] = __byte_perm ( 0, in0[0], 0x6543);
857 out0[1] = 0;
858 out0[0] = 0;
859 break;
860 case 10: out1[3] = __byte_perm (in1[0], in1[1], 0x5432);
861 out1[2] = __byte_perm (in0[3], in1[0], 0x5432);
862 out1[1] = __byte_perm (in0[2], in0[3], 0x5432);
863 out1[0] = __byte_perm (in0[1], in0[2], 0x5432);
864 out0[3] = __byte_perm (in0[0], in0[1], 0x5432);
865 out0[2] = __byte_perm ( 0, in0[0], 0x5432);
866 out0[1] = 0;
867 out0[0] = 0;
868 break;
869 case 11: out1[3] = __byte_perm (in1[0], in1[1], 0x4321);
870 out1[2] = __byte_perm (in0[3], in1[0], 0x4321);
871 out1[1] = __byte_perm (in0[2], in0[3], 0x4321);
872 out1[0] = __byte_perm (in0[1], in0[2], 0x4321);
873 out0[3] = __byte_perm (in0[0], in0[1], 0x4321);
874 out0[2] = __byte_perm ( 0, in0[0], 0x4321);
875 out0[1] = 0;
876 out0[0] = 0;
877 break;
878 case 12: out1[3] = in1[0];
879 out1[2] = in0[3];
880 out1[1] = in0[2];
881 out1[0] = in0[1];
882 out0[3] = in0[0];
883 out0[2] = 0;
884 out0[1] = 0;
885 out0[0] = 0;
886 break;
887 case 13: out1[3] = __byte_perm (in0[3], in1[0], 0x6543);
888 out1[2] = __byte_perm (in0[2], in0[3], 0x6543);
889 out1[1] = __byte_perm (in0[1], in0[2], 0x6543);
890 out1[0] = __byte_perm (in0[0], in0[1], 0x6543);
891 out0[3] = __byte_perm ( 0, in0[0], 0x6543);
892 out0[2] = 0;
893 out0[1] = 0;
894 out0[0] = 0;
895 break;
896 case 14: out1[3] = __byte_perm (in0[3], in1[0], 0x5432);
897 out1[2] = __byte_perm (in0[2], in0[3], 0x5432);
898 out1[1] = __byte_perm (in0[1], in0[2], 0x5432);
899 out1[0] = __byte_perm (in0[0], in0[1], 0x5432);
900 out0[3] = __byte_perm ( 0, in0[0], 0x5432);
901 out0[2] = 0;
902 out0[1] = 0;
903 out0[0] = 0;
904 break;
905 case 15: out1[3] = __byte_perm (in0[3], in1[0], 0x4321);
906 out1[2] = __byte_perm (in0[2], in0[3], 0x4321);
907 out1[1] = __byte_perm (in0[1], in0[2], 0x4321);
908 out1[0] = __byte_perm (in0[0], in0[1], 0x4321);
909 out0[3] = __byte_perm ( 0, in0[0], 0x4321);
910 out0[2] = 0;
911 out0[1] = 0;
912 out0[0] = 0;
913 break;
914 case 16: out1[3] = in0[3];
915 out1[2] = in0[2];
916 out1[1] = in0[1];
917 out1[0] = in0[0];
918 out0[3] = 0;
919 out0[2] = 0;
920 out0[1] = 0;
921 out0[0] = 0;
922 break;
923 case 17: out1[3] = __byte_perm (in0[2], in0[3], 0x6543);
924 out1[2] = __byte_perm (in0[1], in0[2], 0x6543);
925 out1[1] = __byte_perm (in0[0], in0[1], 0x6543);
926 out1[0] = __byte_perm ( 0, in0[0], 0x6543);
927 out0[3] = 0;
928 out0[2] = 0;
929 out0[1] = 0;
930 out0[0] = 0;
931 break;
932 case 18: out1[3] = __byte_perm (in0[2], in0[3], 0x5432);
933 out1[2] = __byte_perm (in0[1], in0[2], 0x5432);
934 out1[1] = __byte_perm (in0[0], in0[1], 0x5432);
935 out1[0] = __byte_perm ( 0, in0[0], 0x5432);
936 out0[3] = 0;
937 out0[2] = 0;
938 out0[1] = 0;
939 out0[0] = 0;
940 break;
941 case 19: out1[3] = __byte_perm (in0[2], in0[3], 0x4321);
942 out1[2] = __byte_perm (in0[1], in0[2], 0x4321);
943 out1[1] = __byte_perm (in0[0], in0[1], 0x4321);
944 out1[0] = __byte_perm ( 0, in0[0], 0x4321);
945 out0[3] = 0;
946 out0[2] = 0;
947 out0[1] = 0;
948 out0[0] = 0;
949 break;
950 case 20: out1[3] = in0[2];
951 out1[2] = in0[1];
952 out1[1] = in0[0];
953 out1[0] = 0;
954 out0[3] = 0;
955 out0[2] = 0;
956 out0[1] = 0;
957 out0[0] = 0;
958 break;
959 case 21: out1[3] = __byte_perm (in0[1], in0[2], 0x6543);
960 out1[2] = __byte_perm (in0[0], in0[1], 0x6543);
961 out1[1] = __byte_perm ( 0, in0[0], 0x6543);
962 out1[0] = 0;
963 out0[3] = 0;
964 out0[2] = 0;
965 out0[1] = 0;
966 out0[0] = 0;
967 break;
968 case 22: out1[3] = __byte_perm (in0[1], in0[2], 0x5432);
969 out1[2] = __byte_perm (in0[0], in0[1], 0x5432);
970 out1[1] = __byte_perm ( 0, in0[0], 0x5432);
971 out1[0] = 0;
972 out0[3] = 0;
973 out0[2] = 0;
974 out0[1] = 0;
975 out0[0] = 0;
976 break;
977 case 23: out1[3] = __byte_perm (in0[1], in0[2], 0x4321);
978 out1[2] = __byte_perm (in0[0], in0[1], 0x4321);
979 out1[1] = __byte_perm ( 0, in0[0], 0x4321);
980 out1[0] = 0;
981 out0[3] = 0;
982 out0[2] = 0;
983 out0[1] = 0;
984 out0[0] = 0;
985 break;
986 case 24: out1[3] = in0[1];
987 out1[2] = in0[0];
988 out1[1] = 0;
989 out1[0] = 0;
990 out0[3] = 0;
991 out0[2] = 0;
992 out0[1] = 0;
993 out0[0] = 0;
994 break;
995 case 25: out1[3] = __byte_perm (in0[0], in0[1], 0x6543);
996 out1[2] = __byte_perm ( 0, in0[0], 0x6543);
997 out1[1] = 0;
998 out1[0] = 0;
999 out0[3] = 0;
1000 out0[2] = 0;
1001 out0[1] = 0;
1002 out0[0] = 0;
1003 break;
1004 case 26: out1[3] = __byte_perm (in0[0], in0[1], 0x5432);
1005 out1[2] = __byte_perm ( 0, in0[0], 0x5432);
1006 out1[1] = 0;
1007 out1[0] = 0;
1008 out0[3] = 0;
1009 out0[2] = 0;
1010 out0[1] = 0;
1011 out0[0] = 0;
1012 break;
1013 case 27: out1[3] = __byte_perm (in0[0], in0[1], 0x4321);
1014 out1[2] = __byte_perm ( 0, in0[0], 0x4321);
1015 out1[1] = 0;
1016 out1[0] = 0;
1017 out0[3] = 0;
1018 out0[2] = 0;
1019 out0[1] = 0;
1020 out0[0] = 0;
1021 break;
1022 case 28: out1[3] = in0[0];
1023 out1[2] = 0;
1024 out1[1] = 0;
1025 out1[0] = 0;
1026 out0[3] = 0;
1027 out0[2] = 0;
1028 out0[1] = 0;
1029 out0[0] = 0;
1030 break;
1031 case 29: out1[3] = __byte_perm ( 0, in0[0], 0x6543);
1032 out1[2] = 0;
1033 out1[1] = 0;
1034 out1[0] = 0;
1035 out0[3] = 0;
1036 out0[2] = 0;
1037 out0[1] = 0;
1038 out0[0] = 0;
1039 break;
1040 case 30: out1[3] = __byte_perm ( 0, in0[0], 0x5432);
1041 out1[2] = 0;
1042 out1[1] = 0;
1043 out1[0] = 0;
1044 out0[3] = 0;
1045 out0[2] = 0;
1046 out0[1] = 0;
1047 out0[0] = 0;
1048 break;
1049 case 31: out1[3] = __byte_perm ( 0, in0[0], 0x4321);
1050 out1[2] = 0;
1051 out1[1] = 0;
1052 out1[0] = 0;
1053 out0[3] = 0;
1054 out0[2] = 0;
1055 out0[1] = 0;
1056 out0[0] = 0;
1057 break;
1058 }
1059 #endif
1060
1061 #if defined IS_AMD || defined IS_GENERIC
1062 switch (num)
1063 {
1064 case 0: out1[3] = in1[3];
1065 out1[2] = in1[2];
1066 out1[1] = in1[1];
1067 out1[0] = in1[0];
1068 out0[3] = in0[3];
1069 out0[2] = in0[2];
1070 out0[1] = in0[1];
1071 out0[0] = in0[0];
1072 break;
1073 case 1: out1[3] = amd_bytealign (in1[3], in1[2], 3);
1074 out1[2] = amd_bytealign (in1[2], in1[1], 3);
1075 out1[1] = amd_bytealign (in1[1], in1[0], 3);
1076 out1[0] = amd_bytealign (in1[0], in0[3], 3);
1077 out0[3] = amd_bytealign (in0[3], in0[2], 3);
1078 out0[2] = amd_bytealign (in0[2], in0[1], 3);
1079 out0[1] = amd_bytealign (in0[1], in0[0], 3);
1080 out0[0] = amd_bytealign (in0[0], 0, 3);
1081 break;
1082 case 2: out1[3] = amd_bytealign (in1[3], in1[2], 2);
1083 out1[2] = amd_bytealign (in1[2], in1[1], 2);
1084 out1[1] = amd_bytealign (in1[1], in1[0], 2);
1085 out1[0] = amd_bytealign (in1[0], in0[3], 2);
1086 out0[3] = amd_bytealign (in0[3], in0[2], 2);
1087 out0[2] = amd_bytealign (in0[2], in0[1], 2);
1088 out0[1] = amd_bytealign (in0[1], in0[0], 2);
1089 out0[0] = amd_bytealign (in0[0], 0, 2);
1090 break;
1091 case 3: out1[3] = amd_bytealign (in1[3], in1[2], 1);
1092 out1[2] = amd_bytealign (in1[2], in1[1], 1);
1093 out1[1] = amd_bytealign (in1[1], in1[0], 1);
1094 out1[0] = amd_bytealign (in1[0], in0[3], 1);
1095 out0[3] = amd_bytealign (in0[3], in0[2], 1);
1096 out0[2] = amd_bytealign (in0[2], in0[1], 1);
1097 out0[1] = amd_bytealign (in0[1], in0[0], 1);
1098 out0[0] = amd_bytealign (in0[0], 0, 1);
1099 break;
1100 case 4: out1[3] = in1[2];
1101 out1[2] = in1[1];
1102 out1[1] = in1[0];
1103 out1[0] = in0[3];
1104 out0[3] = in0[2];
1105 out0[2] = in0[1];
1106 out0[1] = in0[0];
1107 out0[0] = 0;
1108 break;
1109 case 5: out1[3] = amd_bytealign (in1[2], in1[1], 3);
1110 out1[2] = amd_bytealign (in1[1], in1[0], 3);
1111 out1[1] = amd_bytealign (in1[0], in0[3], 3);
1112 out1[0] = amd_bytealign (in0[3], in0[2], 3);
1113 out0[3] = amd_bytealign (in0[2], in0[1], 3);
1114 out0[2] = amd_bytealign (in0[1], in0[0], 3);
1115 out0[1] = amd_bytealign (in0[0], 0, 3);
1116 out0[0] = 0;
1117 break;
1118 case 6: out1[3] = amd_bytealign (in1[2], in1[1], 2);
1119 out1[2] = amd_bytealign (in1[1], in1[0], 2);
1120 out1[1] = amd_bytealign (in1[0], in0[3], 2);
1121 out1[0] = amd_bytealign (in0[3], in0[2], 2);
1122 out0[3] = amd_bytealign (in0[2], in0[1], 2);
1123 out0[2] = amd_bytealign (in0[1], in0[0], 2);
1124 out0[1] = amd_bytealign (in0[0], 0, 2);
1125 out0[0] = 0;
1126 break;
1127 case 7: out1[3] = amd_bytealign (in1[2], in1[1], 1);
1128 out1[2] = amd_bytealign (in1[1], in1[0], 1);
1129 out1[1] = amd_bytealign (in1[0], in0[3], 1);
1130 out1[0] = amd_bytealign (in0[3], in0[2], 1);
1131 out0[3] = amd_bytealign (in0[2], in0[1], 1);
1132 out0[2] = amd_bytealign (in0[1], in0[0], 1);
1133 out0[1] = amd_bytealign (in0[0], 0, 1);
1134 out0[0] = 0;
1135 break;
1136 case 8: out1[3] = in1[1];
1137 out1[2] = in1[0];
1138 out1[1] = in0[3];
1139 out1[0] = in0[2];
1140 out0[3] = in0[1];
1141 out0[2] = in0[0];
1142 out0[1] = 0;
1143 out0[0] = 0;
1144 break;
1145 case 9: out1[3] = amd_bytealign (in1[1], in1[0], 3);
1146 out1[2] = amd_bytealign (in1[0], in0[3], 3);
1147 out1[1] = amd_bytealign (in0[3], in0[2], 3);
1148 out1[0] = amd_bytealign (in0[2], in0[1], 3);
1149 out0[3] = amd_bytealign (in0[1], in0[0], 3);
1150 out0[2] = amd_bytealign (in0[0], 0, 3);
1151 out0[1] = 0;
1152 out0[0] = 0;
1153 break;
1154 case 10: out1[3] = amd_bytealign (in1[1], in1[0], 2);
1155 out1[2] = amd_bytealign (in1[0], in0[3], 2);
1156 out1[1] = amd_bytealign (in0[3], in0[2], 2);
1157 out1[0] = amd_bytealign (in0[2], in0[1], 2);
1158 out0[3] = amd_bytealign (in0[1], in0[0], 2);
1159 out0[2] = amd_bytealign (in0[0], 0, 2);
1160 out0[1] = 0;
1161 out0[0] = 0;
1162 break;
1163 case 11: out1[3] = amd_bytealign (in1[1], in1[0], 1);
1164 out1[2] = amd_bytealign (in1[0], in0[3], 1);
1165 out1[1] = amd_bytealign (in0[3], in0[2], 1);
1166 out1[0] = amd_bytealign (in0[2], in0[1], 1);
1167 out0[3] = amd_bytealign (in0[1], in0[0], 1);
1168 out0[2] = amd_bytealign (in0[0], 0, 1);
1169 out0[1] = 0;
1170 out0[0] = 0;
1171 break;
1172 case 12: out1[3] = in1[0];
1173 out1[2] = in0[3];
1174 out1[1] = in0[2];
1175 out1[0] = in0[1];
1176 out0[3] = in0[0];
1177 out0[2] = 0;
1178 out0[1] = 0;
1179 out0[0] = 0;
1180 break;
1181 case 13: out1[3] = amd_bytealign (in1[0], in0[3], 3);
1182 out1[2] = amd_bytealign (in0[3], in0[2], 3);
1183 out1[1] = amd_bytealign (in0[2], in0[1], 3);
1184 out1[0] = amd_bytealign (in0[1], in0[0], 3);
1185 out0[3] = amd_bytealign (in0[0], 0, 3);
1186 out0[2] = 0;
1187 out0[1] = 0;
1188 out0[0] = 0;
1189 break;
1190 case 14: out1[3] = amd_bytealign (in1[0], in0[3], 2);
1191 out1[2] = amd_bytealign (in0[3], in0[2], 2);
1192 out1[1] = amd_bytealign (in0[2], in0[1], 2);
1193 out1[0] = amd_bytealign (in0[1], in0[0], 2);
1194 out0[3] = amd_bytealign (in0[0], 0, 2);
1195 out0[2] = 0;
1196 out0[1] = 0;
1197 out0[0] = 0;
1198 break;
1199 case 15: out1[3] = amd_bytealign (in1[0], in0[3], 1);
1200 out1[2] = amd_bytealign (in0[3], in0[2], 1);
1201 out1[1] = amd_bytealign (in0[2], in0[1], 1);
1202 out1[0] = amd_bytealign (in0[1], in0[0], 1);
1203 out0[3] = amd_bytealign (in0[0], 0, 1);
1204 out0[2] = 0;
1205 out0[1] = 0;
1206 out0[0] = 0;
1207 break;
1208 case 16: out1[3] = in0[3];
1209 out1[2] = in0[2];
1210 out1[1] = in0[1];
1211 out1[0] = in0[0];
1212 out0[3] = 0;
1213 out0[2] = 0;
1214 out0[1] = 0;
1215 out0[0] = 0;
1216 break;
1217 case 17: out1[3] = amd_bytealign (in0[3], in0[2], 3);
1218 out1[2] = amd_bytealign (in0[2], in0[1], 3);
1219 out1[1] = amd_bytealign (in0[1], in0[0], 3);
1220 out1[0] = amd_bytealign (in0[0], 0, 3);
1221 out0[3] = 0;
1222 out0[2] = 0;
1223 out0[1] = 0;
1224 out0[0] = 0;
1225 break;
1226 case 18: out1[3] = amd_bytealign (in0[3], in0[2], 2);
1227 out1[2] = amd_bytealign (in0[2], in0[1], 2);
1228 out1[1] = amd_bytealign (in0[1], in0[0], 2);
1229 out1[0] = amd_bytealign (in0[0], 0, 2);
1230 out0[3] = 0;
1231 out0[2] = 0;
1232 out0[1] = 0;
1233 out0[0] = 0;
1234 break;
1235 case 19: out1[3] = amd_bytealign (in0[3], in0[2], 1);
1236 out1[2] = amd_bytealign (in0[2], in0[1], 1);
1237 out1[1] = amd_bytealign (in0[1], in0[0], 1);
1238 out1[0] = amd_bytealign (in0[0], 0, 1);
1239 out0[3] = 0;
1240 out0[2] = 0;
1241 out0[1] = 0;
1242 out0[0] = 0;
1243 break;
1244 case 20: out1[3] = in0[2];
1245 out1[2] = in0[1];
1246 out1[1] = in0[0];
1247 out1[0] = 0;
1248 out0[3] = 0;
1249 out0[2] = 0;
1250 out0[1] = 0;
1251 out0[0] = 0;
1252 break;
1253 case 21: out1[3] = amd_bytealign (in0[2], in0[1], 3);
1254 out1[2] = amd_bytealign (in0[1], in0[0], 3);
1255 out1[1] = amd_bytealign (in0[0], 0, 3);
1256 out1[0] = 0;
1257 out0[3] = 0;
1258 out0[2] = 0;
1259 out0[1] = 0;
1260 out0[0] = 0;
1261 break;
1262 case 22: out1[3] = amd_bytealign (in0[2], in0[1], 2);
1263 out1[2] = amd_bytealign (in0[1], in0[0], 2);
1264 out1[1] = amd_bytealign (in0[0], 0, 2);
1265 out1[0] = 0;
1266 out0[3] = 0;
1267 out0[2] = 0;
1268 out0[1] = 0;
1269 out0[0] = 0;
1270 break;
1271 case 23: out1[3] = amd_bytealign (in0[2], in0[1], 1);
1272 out1[2] = amd_bytealign (in0[1], in0[0], 1);
1273 out1[1] = amd_bytealign (in0[0], 0, 1);
1274 out1[0] = 0;
1275 out0[3] = 0;
1276 out0[2] = 0;
1277 out0[1] = 0;
1278 out0[0] = 0;
1279 break;
1280 case 24: out1[3] = in0[1];
1281 out1[2] = in0[0];
1282 out1[1] = 0;
1283 out1[0] = 0;
1284 out0[3] = 0;
1285 out0[2] = 0;
1286 out0[1] = 0;
1287 out0[0] = 0;
1288 break;
1289 case 25: out1[3] = amd_bytealign (in0[1], in0[0], 3);
1290 out1[2] = amd_bytealign (in0[0], 0, 3);
1291 out1[1] = 0;
1292 out1[0] = 0;
1293 out0[3] = 0;
1294 out0[2] = 0;
1295 out0[1] = 0;
1296 out0[0] = 0;
1297 break;
1298 case 26: out1[3] = amd_bytealign (in0[1], in0[0], 2);
1299 out1[2] = amd_bytealign (in0[0], 0, 2);
1300 out1[1] = 0;
1301 out1[0] = 0;
1302 out0[3] = 0;
1303 out0[2] = 0;
1304 out0[1] = 0;
1305 out0[0] = 0;
1306 break;
1307 case 27: out1[3] = amd_bytealign (in0[1], in0[0], 1);
1308 out1[2] = amd_bytealign (in0[0], 0, 1);
1309 out1[1] = 0;
1310 out1[0] = 0;
1311 out0[3] = 0;
1312 out0[2] = 0;
1313 out0[1] = 0;
1314 out0[0] = 0;
1315 break;
1316 case 28: out1[3] = in0[0];
1317 out1[2] = 0;
1318 out1[1] = 0;
1319 out1[0] = 0;
1320 out0[3] = 0;
1321 out0[2] = 0;
1322 out0[1] = 0;
1323 out0[0] = 0;
1324 break;
1325 case 29: out1[3] = amd_bytealign (in0[0], 0, 3);
1326 out1[2] = 0;
1327 out1[1] = 0;
1328 out1[0] = 0;
1329 out0[3] = 0;
1330 out0[2] = 0;
1331 out0[1] = 0;
1332 out0[0] = 0;
1333 break;
1334 case 30: out1[3] = amd_bytealign (in0[0], 0, 2);
1335 out1[2] = 0;
1336 out1[1] = 0;
1337 out1[0] = 0;
1338 out0[3] = 0;
1339 out0[2] = 0;
1340 out0[1] = 0;
1341 out0[0] = 0;
1342 break;
1343 case 31: out1[3] = amd_bytealign (in0[0], 0, 1);
1344 out1[2] = 0;
1345 out1[1] = 0;
1346 out1[0] = 0;
1347 out0[3] = 0;
1348 out0[2] = 0;
1349 out0[1] = 0;
1350 out0[0] = 0;
1351 break;
1352 }
1353 #endif
1354 }
1355
1356 static void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
1357 {
1358 u32 tmp[2];
1359
1360 switch (offset & 3)
1361 {
1362 case 0: tmp[0] = src_r0;
1363 tmp[1] = 0;
1364 break;
1365 case 1: tmp[0] = src_r0 << 8;
1366 tmp[1] = src_r0 >> 24;
1367 break;
1368 case 2: tmp[0] = src_r0 << 16;
1369 tmp[1] = src_r0 >> 16;
1370 break;
1371 case 3: tmp[0] = src_r0 << 24;
1372 tmp[1] = src_r0 >> 8;
1373 break;
1374 }
1375
1376 switch (offset / 4)
1377 {
1378 case 0: dst0[0] |= tmp[0];
1379 dst0[1] = tmp[1];
1380 break;
1381 case 1: dst0[1] |= tmp[0];
1382 dst0[2] = tmp[1];
1383 break;
1384 case 2: dst0[2] |= tmp[0];
1385 dst0[3] = tmp[1];
1386 break;
1387 case 3: dst0[3] |= tmp[0];
1388 dst1[0] = tmp[1];
1389 break;
1390 case 4: dst1[0] |= tmp[0];
1391 dst1[1] = tmp[1];
1392 break;
1393 case 5: dst1[1] |= tmp[0];
1394 dst1[2] = tmp[1];
1395 break;
1396 case 6: dst1[2] |= tmp[0];
1397 dst1[3] = tmp[1];
1398 break;
1399 case 7: dst1[3] |= tmp[0];
1400 break;
1401 }
1402 }
1403
1404 static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
1405 {
1406 #ifdef IS_NV
1407 switch (offset)
1408 {
1409 case 0:
1410 dst0[0] = src_r0[0];
1411 dst0[1] = src_r0[1];
1412 dst0[2] = src_r0[2];
1413 dst0[3] = src_r0[3];
1414 dst1[0] = src_r1[0];
1415 dst1[1] = src_r1[1];
1416 dst1[2] = src_r1[2];
1417 dst1[3] = src_r1[3];
1418 break;
1419
1420 case 1:
1421 dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x6540);
1422 dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1423 dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1424 dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1425 dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1426 dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
1427 dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x6543);
1428 dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x6543);
1429 break;
1430
1431 case 2:
1432 dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x5410);
1433 dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1434 dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1435 dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1436 dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1437 dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
1438 dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x5432);
1439 dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x5432);
1440 break;
1441
1442 case 3:
1443 dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x4210);
1444 dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1445 dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1446 dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1447 dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1448 dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
1449 dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x4321);
1450 dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x4321);
1451 break;
1452
1453 case 4:
1454 dst0[1] = src_r0[0];
1455 dst0[2] = src_r0[1];
1456 dst0[3] = src_r0[2];
1457 dst1[0] = src_r0[3];
1458 dst1[1] = src_r1[0];
1459 dst1[2] = src_r1[1];
1460 dst1[3] = src_r1[2];
1461 break;
1462
1463 case 5:
1464 dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x6540);
1465 dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1466 dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1467 dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1468 dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1469 dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
1470 dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x6543);
1471 break;
1472
1473 case 6:
1474 dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x5410);
1475 dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1476 dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1477 dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1478 dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1479 dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
1480 dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x5432);
1481 break;
1482
1483 case 7:
1484 dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x4210);
1485 dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1486 dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1487 dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1488 dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1489 dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
1490 dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x4321);
1491 break;
1492
1493 case 8:
1494 dst0[2] = src_r0[0];
1495 dst0[3] = src_r0[1];
1496 dst1[0] = src_r0[2];
1497 dst1[1] = src_r0[3];
1498 dst1[2] = src_r1[0];
1499 dst1[3] = src_r1[1];
1500 break;
1501
1502 case 9:
1503 dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x6540);
1504 dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1505 dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1506 dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1507 dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1508 dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
1509 break;
1510
1511 case 10:
1512 dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x5410);
1513 dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1514 dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1515 dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1516 dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1517 dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
1518 break;
1519
1520 case 11:
1521 dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x4210);
1522 dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1523 dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1524 dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1525 dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1526 dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
1527 break;
1528
1529 case 12:
1530 dst0[3] = src_r0[0];
1531 dst1[0] = src_r0[1];
1532 dst1[1] = src_r0[2];
1533 dst1[2] = src_r0[3];
1534 dst1[3] = src_r1[0];
1535 break;
1536
1537 case 13:
1538 dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x6540);
1539 dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1540 dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1541 dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1542 dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
1543 break;
1544
1545 case 14:
1546 dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x5410);
1547 dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1548 dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1549 dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1550 dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
1551 break;
1552
1553 case 15:
1554 dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x4210);
1555 dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1556 dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1557 dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1558 dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
1559 break;
1560
1561 case 16:
1562 dst1[0] = src_r0[0];
1563 dst1[1] = src_r0[1];
1564 dst1[2] = src_r0[2];
1565 dst1[3] = src_r0[3];
1566 break;
1567
1568 case 17:
1569 dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x6540);
1570 dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1571 dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1572 dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
1573 break;
1574
1575 case 18:
1576 dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x5410);
1577 dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1578 dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1579 dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
1580 break;
1581
1582 case 19:
1583 dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x4210);
1584 dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1585 dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1586 dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
1587 break;
1588
1589 case 20:
1590 dst1[1] = src_r0[0];
1591 dst1[2] = src_r0[1];
1592 dst1[3] = src_r0[2];
1593 break;
1594
1595 case 21:
1596 dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x6540);
1597 dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1598 dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
1599 break;
1600
1601 case 22:
1602 dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x5410);
1603 dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1604 dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
1605 break;
1606
1607 case 23:
1608 dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x4210);
1609 dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1610 dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
1611 break;
1612
1613 case 24:
1614 dst1[2] = src_r0[0];
1615 dst1[3] = src_r0[1];
1616 break;
1617
1618 case 25:
1619 dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x6540);
1620 dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
1621 break;
1622
1623 case 26:
1624 dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x5410);
1625 dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
1626 break;
1627
1628 case 27:
1629 dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x4210);
1630 dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
1631 break;
1632
1633 case 28:
1634 dst1[3] = src_r0[0];
1635 break;
1636
1637 case 29:
1638 dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x6540);
1639 break;
1640
1641 case 30:
1642 dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x5410);
1643 break;
1644
1645 case 31:
1646 dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x4210);
1647 break;
1648 }
1649 #endif
1650
1651 #if defined IS_AMD || defined IS_GENERIC
1652 switch (offset)
1653 {
1654 case 31:
1655 dst1[3] = src_l1[3] | src_r0[0] << 24;
1656 break;
1657 case 30:
1658 dst1[3] = src_l1[3] | src_r0[0] << 16;
1659 break;
1660 case 29:
1661 dst1[3] = src_l1[3] | src_r0[0] << 8;
1662 break;
1663 case 28:
1664 dst1[3] = src_r0[0];
1665 break;
1666 case 27:
1667 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
1668 dst1[2] = src_l1[2] | src_r0[0] << 24;
1669 break;
1670 case 26:
1671 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
1672 dst1[2] = src_l1[2] | src_r0[0] << 16;
1673 break;
1674 case 25:
1675 dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
1676 dst1[2] = src_l1[2] | src_r0[0] << 8;
1677 break;
1678 case 24:
1679 dst1[3] = src_r0[1];
1680 dst1[2] = src_r0[0];
1681 break;
1682 case 23:
1683 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
1684 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
1685 dst1[1] = src_l1[1] | src_r0[0] << 24;
1686 break;
1687 case 22:
1688 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
1689 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
1690 dst1[1] = src_l1[1] | src_r0[0] << 16;
1691 break;
1692 case 21:
1693 dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
1694 dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
1695 dst1[1] = src_l1[1] | src_r0[0] << 8;
1696 break;
1697 case 20:
1698 dst1[3] = src_r0[2];
1699 dst1[2] = src_r0[1];
1700 dst1[1] = src_r0[0];
1701 break;
1702 case 19:
1703 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
1704 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
1705 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
1706 dst1[0] = src_l1[0] | src_r0[0] << 24;
1707 break;
1708 case 18:
1709 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
1710 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
1711 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
1712 dst1[0] = src_l1[0] | src_r0[0] << 16;
1713 break;
1714 case 17:
1715 dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
1716 dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
1717 dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
1718 dst1[0] = src_l1[0] | src_r0[0] << 8;
1719 break;
1720 case 16:
1721 dst1[3] = src_r0[3];
1722 dst1[2] = src_r0[2];
1723 dst1[1] = src_r0[1];
1724 dst1[0] = src_r0[0];
1725 break;
1726 case 15:
1727 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 1);
1728 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 1);
1729 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 1);
1730 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 1);
1731 dst0[3] = src_l0[3] | src_r0[0] << 24;
1732 break;
1733 case 14:
1734 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 2);
1735 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 2);
1736 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 2);
1737 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 2);
1738 dst0[3] = src_l0[3] | src_r0[0] << 16;
1739 break;
1740 case 13:
1741 dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 3);
1742 dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 3);
1743 dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 3);
1744 dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 3);
1745 dst0[3] = src_l0[3] | src_r0[0] << 8;
1746 break;
1747 case 12:
1748 dst1[3] = src_r1[0];
1749 dst1[2] = src_r0[3];
1750 dst1[1] = src_r0[2];
1751 dst1[0] = src_r0[1];
1752 dst0[3] = src_r0[0];
1753 break;
1754 case 11:
1755 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 1);
1756 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 1);
1757 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 1);
1758 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 1);
1759 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
1760 dst0[2] = src_l0[2] | src_r0[0] << 24;
1761 break;
1762 case 10:
1763 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 2);
1764 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 2);
1765 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 2);
1766 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 2);
1767 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
1768 dst0[2] = src_l0[2] | src_r0[0] << 16;
1769 break;
1770 case 9:
1771 dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 3);
1772 dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 3);
1773 dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 3);
1774 dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 3);
1775 dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
1776 dst0[2] = src_l0[2] | src_r0[0] << 8;
1777 break;
1778 case 8:
1779 dst1[3] = src_r1[1];
1780 dst1[2] = src_r1[0];
1781 dst1[1] = src_r0[3];
1782 dst1[0] = src_r0[2];
1783 dst0[3] = src_r0[1];
1784 dst0[2] = src_r0[0];
1785 break;
1786 case 7:
1787 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 1);
1788 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 1);
1789 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 1);
1790 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 1);
1791 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
1792 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
1793 dst0[1] = src_l0[1] | src_r0[0] << 24;
1794 break;
1795 case 6:
1796 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 2);
1797 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 2);
1798 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 2);
1799 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 2);
1800 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
1801 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
1802 dst0[1] = src_l0[1] | src_r0[0] << 16;
1803 break;
1804 case 5:
1805 dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 3);
1806 dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 3);
1807 dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 3);
1808 dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 3);
1809 dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
1810 dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
1811 dst0[1] = src_l0[1] | src_r0[0] << 8;
1812 break;
1813 case 4:
1814 dst1[3] = src_r1[2];
1815 dst1[2] = src_r1[1];
1816 dst1[1] = src_r1[0];
1817 dst1[0] = src_r0[3];
1818 dst0[3] = src_r0[2];
1819 dst0[2] = src_r0[1];
1820 dst0[1] = src_r0[0];
1821 break;
1822 case 3:
1823 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 1);
1824 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 1);
1825 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 1);
1826 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 1);
1827 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
1828 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
1829 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
1830 dst0[0] = src_l0[0] | src_r0[0] << 24;
1831 break;
1832 case 2:
1833 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 2);
1834 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 2);
1835 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 2);
1836 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 2);
1837 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
1838 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
1839 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
1840 dst0[0] = src_l0[0] | src_r0[0] << 16;
1841 break;
1842 case 1:
1843 dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 3);
1844 dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 3);
1845 dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 3);
1846 dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 3);
1847 dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
1848 dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
1849 dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
1850 dst0[0] = src_l0[0] | src_r0[0] << 8;
1851 break;
1852 case 0:
1853 dst1[3] = src_r1[3];
1854 dst1[2] = src_r1[2];
1855 dst1[1] = src_r1[1];
1856 dst1[0] = src_r1[0];
1857 dst0[3] = src_r0[3];
1858 dst0[2] = src_r0[2];
1859 dst0[1] = src_r0[1];
1860 dst0[0] = src_r0[0];
1861 break;
1862 }
1863 #endif
1864 }
1865
1866 static void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
1867 {
1868 rshift_block_N (in0, in1, out0, out1, 32 - len);
1869
1870 u32 tib40[4];
1871 u32 tib41[4];
1872
1873 tib40[0] = out1[3];
1874 tib40[1] = out1[2];
1875 tib40[2] = out1[1];
1876 tib40[3] = out1[0];
1877 tib41[0] = out0[3];
1878 tib41[1] = out0[2];
1879 tib41[2] = out0[1];
1880 tib41[3] = out0[0];
1881
1882 out0[0] = swap32 (tib40[0]);
1883 out0[1] = swap32 (tib40[1]);
1884 out0[2] = swap32 (tib40[2]);
1885 out0[3] = swap32 (tib40[3]);
1886 out1[0] = swap32 (tib41[0]);
1887 out1[1] = swap32 (tib41[1]);
1888 out1[2] = swap32 (tib41[2]);
1889 out1[3] = swap32 (tib41[3]);
1890 }
1891
1892 static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1893 {
1894 buf0[0] |= (generate_cmask (buf0[0]));
1895 buf0[1] |= (generate_cmask (buf0[1]));
1896 buf0[2] |= (generate_cmask (buf0[2]));
1897 buf0[3] |= (generate_cmask (buf0[3]));
1898 buf1[0] |= (generate_cmask (buf1[0]));
1899 buf1[1] |= (generate_cmask (buf1[1]));
1900 buf1[2] |= (generate_cmask (buf1[2]));
1901 buf1[3] |= (generate_cmask (buf1[3]));
1902
1903 return in_len;
1904 }
1905
1906 static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1907 {
1908 buf0[0] &= ~(generate_cmask (buf0[0]));
1909 buf0[1] &= ~(generate_cmask (buf0[1]));
1910 buf0[2] &= ~(generate_cmask (buf0[2]));
1911 buf0[3] &= ~(generate_cmask (buf0[3]));
1912 buf1[0] &= ~(generate_cmask (buf1[0]));
1913 buf1[1] &= ~(generate_cmask (buf1[1]));
1914 buf1[2] &= ~(generate_cmask (buf1[2]));
1915 buf1[3] &= ~(generate_cmask (buf1[3]));
1916
1917 return in_len;
1918 }
1919
1920 static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1921 {
1922 rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len);
1923
1924 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
1925
1926 return in_len;
1927 }
1928
1929 static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1930 {
1931 rule_op_mangle_urest (p0, p1, buf0, buf1, in_len);
1932
1933 buf0[0] |= (0x00000020 & generate_cmask (buf0[0]));
1934
1935 return in_len;
1936 }
1937
1938 static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1939 {
1940 buf0[0] ^= (generate_cmask (buf0[0]));
1941 buf0[1] ^= (generate_cmask (buf0[1]));
1942 buf0[2] ^= (generate_cmask (buf0[2]));
1943 buf0[3] ^= (generate_cmask (buf0[3]));
1944 buf1[0] ^= (generate_cmask (buf1[0]));
1945 buf1[1] ^= (generate_cmask (buf1[1]));
1946 buf1[2] ^= (generate_cmask (buf1[2]));
1947 buf1[3] ^= (generate_cmask (buf1[3]));
1948
1949 return in_len;
1950 }
1951
1952 static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1953 {
1954 if (p0 >= in_len) return (in_len);
1955
1956 const u32 tmp = 0x20u << ((p0 & 3) * 8);
1957
1958 switch (p0 / 4)
1959 {
1960 case 0: buf0[0] ^= (tmp & generate_cmask (buf0[0])); break;
1961 case 1: buf0[1] ^= (tmp & generate_cmask (buf0[1])); break;
1962 case 2: buf0[2] ^= (tmp & generate_cmask (buf0[2])); break;
1963 case 3: buf0[3] ^= (tmp & generate_cmask (buf0[3])); break;
1964 case 4: buf1[0] ^= (tmp & generate_cmask (buf1[0])); break;
1965 case 5: buf1[1] ^= (tmp & generate_cmask (buf1[1])); break;
1966 case 6: buf1[2] ^= (tmp & generate_cmask (buf1[2])); break;
1967 case 7: buf1[3] ^= (tmp & generate_cmask (buf1[3])); break;
1968 }
1969
1970 return in_len;
1971 }
1972
1973 static u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1974 {
1975 reverse_block (buf0, buf1, buf0, buf1, in_len);
1976
1977 return in_len;
1978 }
1979
1980 static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1981 {
1982 if ((in_len + in_len) >= 32) return (in_len);
1983
1984 u32 out_len = in_len;
1985
1986 append_block8 (out_len, buf0, buf1, buf0, buf1, buf0, buf1);
1987
1988 out_len += in_len;
1989
1990 return out_len;
1991 }
1992
1993 static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
1994 {
1995 if (((in_len * p0) + in_len) >= 32) return (in_len);
1996
1997 u32 out_len = in_len;
1998
1999 u32 tib40[4];
2000 u32 tib41[4];
2001
2002 tib40[0] = buf0[0];
2003 tib40[1] = buf0[1];
2004 tib40[2] = buf0[2];
2005 tib40[3] = buf0[3];
2006 tib41[0] = buf1[0];
2007 tib41[1] = buf1[1];
2008 tib41[2] = buf1[2];
2009 tib41[3] = buf1[3];
2010
2011 for (u32 i = 0; i < p0; i++)
2012 {
2013 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2014
2015 out_len += in_len;
2016 }
2017
2018 return out_len;
2019 }
2020
2021 static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2022 {
2023 if ((in_len + in_len) >= 32) return (in_len);
2024
2025 u32 out_len = in_len;
2026
2027 u32 tib40[4];
2028 u32 tib41[4];
2029
2030 reverse_block (buf0, buf1, tib40, tib41, out_len);
2031
2032 append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41);
2033
2034 out_len += in_len;
2035
2036 return out_len;
2037 }
2038
2039 static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2040 {
2041 if ((in_len + 1) >= 32) return (in_len);
2042
2043 u32 out_len = in_len;
2044
2045 append_block1 (out_len, buf0, buf1, p0);
2046
2047 out_len++;
2048
2049 return out_len;
2050 }
2051
2052 static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2053 {
2054 if ((in_len + 1) >= 32) return (in_len);
2055
2056 u32 out_len = in_len;
2057
2058 rshift_block (buf0, buf1, buf0, buf1);
2059
2060 buf0[0] = buf0[0] | p0;
2061
2062 out_len++;
2063
2064 return out_len;
2065 }
2066
2067 static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2068 {
2069 if (in_len == 0) return (in_len);
2070
2071 const u32 in_len1 = in_len - 1;
2072
2073 const u32 sh = (in_len1 & 3) * 8;
2074
2075 const u32 tmp = (buf0[0] & 0xff) << sh;
2076
2077 lshift_block (buf0, buf1, buf0, buf1);
2078
2079 switch (in_len1 / 4)
2080 {
2081 case 0: buf0[0] |= tmp; break;
2082 case 1: buf0[1] |= tmp; break;
2083 case 2: buf0[2] |= tmp; break;
2084 case 3: buf0[3] |= tmp; break;
2085 case 4: buf1[0] |= tmp; break;
2086 case 5: buf1[1] |= tmp; break;
2087 case 6: buf1[2] |= tmp; break;
2088 case 7: buf1[3] |= tmp; break;
2089 }
2090
2091 return in_len;
2092 }
2093
2094 static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2095 {
2096 if (in_len == 0) return (in_len);
2097
2098 const u32 in_len1 = in_len - 1;
2099
2100 const u32 sh = (in_len1 & 3) * 8;
2101
2102 u32 tmp = 0;
2103
2104 switch (in_len1 / 4)
2105 {
2106 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2107 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2108 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2109 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2110 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2111 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2112 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2113 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2114 }
2115
2116 rshift_block (buf0, buf1, buf0, buf1);
2117
2118 buf0[0] |= tmp;
2119
2120 truncate_right (buf0, buf1, in_len);
2121
2122 return in_len;
2123 }
2124
2125 static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2126 {
2127 if (in_len == 0) return (in_len);
2128
2129 const u32 in_len1 = in_len - 1;
2130
2131 lshift_block (buf0, buf1, buf0, buf1);
2132
2133 return in_len1;
2134 }
2135
2136 static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2137 {
2138 if (in_len == 0) return (in_len);
2139
2140 const u32 in_len1 = in_len - 1;
2141
2142 const u32 tmp = (1 << ((in_len1 & 3) * 8)) - 1;
2143
2144 switch (in_len1 / 4)
2145 {
2146 case 0: buf0[0] &= tmp; break;
2147 case 1: buf0[1] &= tmp; break;
2148 case 2: buf0[2] &= tmp; break;
2149 case 3: buf0[3] &= tmp; break;
2150 case 4: buf1[0] &= tmp; break;
2151 case 5: buf1[1] &= tmp; break;
2152 case 6: buf1[2] &= tmp; break;
2153 case 7: buf1[3] &= tmp; break;
2154 }
2155
2156 return in_len1;
2157 }
2158
2159 static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2160 {
2161 if (p0 >= in_len) return (in_len);
2162
2163 u32 out_len = in_len;
2164
2165 u32 tib40[4];
2166 u32 tib41[4];
2167
2168 lshift_block (buf0, buf1, tib40, tib41);
2169
2170 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2171 const u32 mr = ~ml;
2172
2173 switch (p0 / 4)
2174 {
2175 case 0: buf0[0] = (buf0[0] & ml)
2176 | (tib40[0] & mr);
2177 buf0[1] = tib40[1];
2178 buf0[2] = tib40[2];
2179 buf0[3] = tib40[3];
2180 buf1[0] = tib41[0];
2181 buf1[1] = tib41[1];
2182 buf1[2] = tib41[2];
2183 buf1[3] = tib41[3];
2184 break;
2185 case 1: buf0[1] = (buf0[1] & ml)
2186 | (tib40[1] & mr);
2187 buf0[2] = tib40[2];
2188 buf0[3] = tib40[3];
2189 buf1[0] = tib41[0];
2190 buf1[1] = tib41[1];
2191 buf1[2] = tib41[2];
2192 buf1[3] = tib41[3];
2193 break;
2194 case 2: buf0[2] = (buf0[2] & ml)
2195 | (tib40[2] & mr);
2196 buf0[3] = tib40[3];
2197 buf1[0] = tib41[0];
2198 buf1[1] = tib41[1];
2199 buf1[2] = tib41[2];
2200 buf1[3] = tib41[3];
2201 break;
2202 case 3: buf0[3] = (buf0[3] & ml)
2203 | (tib40[3] & mr);
2204 buf1[0] = tib41[0];
2205 buf1[1] = tib41[1];
2206 buf1[2] = tib41[2];
2207 buf1[3] = tib41[3];
2208 break;
2209 case 4: buf1[0] = (buf1[0] & ml)
2210 | (tib41[0] & mr);
2211 buf1[1] = tib41[1];
2212 buf1[2] = tib41[2];
2213 buf1[3] = tib41[3];
2214 break;
2215 case 5: buf1[1] = (buf1[1] & ml)
2216 | (tib41[1] & mr);
2217 buf1[2] = tib41[2];
2218 buf1[3] = tib41[3];
2219 break;
2220 case 6: buf1[2] = (buf1[2] & ml)
2221 | (tib41[2] & mr);
2222 buf1[3] = tib41[3];
2223 break;
2224 case 7: buf1[3] = (buf1[3] & ml)
2225 | (tib41[3] & mr);
2226 break;
2227 }
2228
2229 out_len--;
2230
2231 return out_len;
2232 }
2233
2234 static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2235 {
2236 if (p0 >= in_len) return (in_len);
2237
2238 if ((p0 + p1) > in_len) return (in_len);
2239
2240 u32 out_len = p1;
2241
2242 lshift_block_N (buf0, buf1, buf0, buf1, p0);
2243
2244 truncate_right (buf0, buf1, out_len);
2245
2246 return out_len;
2247 }
2248
2249 static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2250 {
2251 if (p0 >= in_len) return (in_len);
2252
2253 if ((p0 + p1) > in_len) return (in_len);
2254
2255 u32 out_len = in_len;
2256
2257 u32 tib40[4];
2258 u32 tib41[4];
2259
2260 tib40[0] = 0;
2261 tib40[1] = 0;
2262 tib40[2] = 0;
2263 tib40[3] = 0;
2264 tib41[0] = 0;
2265 tib41[1] = 0;
2266 tib41[2] = 0;
2267 tib41[3] = 0;
2268
2269 lshift_block_N (buf0, buf1, tib40, tib41, p1);
2270
2271 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2272 const u32 mr = ~ml;
2273
2274 switch (p0 / 4)
2275 {
2276 case 0: buf0[0] = (buf0[0] & ml)
2277 | (tib40[0] & mr);
2278 buf0[1] = tib40[1];
2279 buf0[2] = tib40[2];
2280 buf0[3] = tib40[3];
2281 buf1[0] = tib41[0];
2282 buf1[1] = tib41[1];
2283 buf1[2] = tib41[2];
2284 buf1[3] = tib41[3];
2285 break;
2286 case 1: buf0[1] = (buf0[1] & ml)
2287 | (tib40[1] & mr);
2288 buf0[2] = tib40[2];
2289 buf0[3] = tib40[3];
2290 buf1[0] = tib41[0];
2291 buf1[1] = tib41[1];
2292 buf1[2] = tib41[2];
2293 buf1[3] = tib41[3];
2294 break;
2295 case 2: buf0[2] = (buf0[2] & ml)
2296 | (tib40[2] & mr);
2297 buf0[3] = tib40[3];
2298 buf1[0] = tib41[0];
2299 buf1[1] = tib41[1];
2300 buf1[2] = tib41[2];
2301 buf1[3] = tib41[3];
2302 break;
2303 case 3: buf0[3] = (buf0[3] & ml)
2304 | (tib40[3] & mr);
2305 buf1[0] = tib41[0];
2306 buf1[1] = tib41[1];
2307 buf1[2] = tib41[2];
2308 buf1[3] = tib41[3];
2309 break;
2310 case 4: buf1[0] = (buf1[0] & ml)
2311 | (tib41[0] & mr);
2312 buf1[1] = tib41[1];
2313 buf1[2] = tib41[2];
2314 buf1[3] = tib41[3];
2315 break;
2316 case 5: buf1[1] = (buf1[1] & ml)
2317 | (tib41[1] & mr);
2318 buf1[2] = tib41[2];
2319 buf1[3] = tib41[3];
2320 break;
2321 case 6: buf1[2] = (buf1[2] & ml)
2322 | (tib41[2] & mr);
2323 buf1[3] = tib41[3];
2324 break;
2325 case 7: buf1[3] = (buf1[3] & ml)
2326 | (tib41[3] & mr);
2327 break;
2328 }
2329
2330 out_len -= p1;
2331
2332 return out_len;
2333 }
2334
2335 static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2336 {
2337 if (p0 > in_len) return (in_len);
2338
2339 if ((in_len + 1) >= 32) return (in_len);
2340
2341 u32 out_len = in_len;
2342
2343 u32 tib40[4];
2344 u32 tib41[4];
2345
2346 rshift_block (buf0, buf1, tib40, tib41);
2347
2348 const u32 p1n = p1 << ((p0 & 3) * 8);
2349
2350 const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
2351
2352 const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
2353
2354 switch (p0 / 4)
2355 {
2356 case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr);
2357 buf0[1] = tib40[1];
2358 buf0[2] = tib40[2];
2359 buf0[3] = tib40[3];
2360 buf1[0] = tib41[0];
2361 buf1[1] = tib41[1];
2362 buf1[2] = tib41[2];
2363 buf1[3] = tib41[3];
2364 break;
2365 case 1: buf0[1] = (buf0[1] & ml) | p1n | (tib40[1] & mr);
2366 buf0[2] = tib40[2];
2367 buf0[3] = tib40[3];
2368 buf1[0] = tib41[0];
2369 buf1[1] = tib41[1];
2370 buf1[2] = tib41[2];
2371 buf1[3] = tib41[3];
2372 break;
2373 case 2: buf0[2] = (buf0[2] & ml) | p1n | (tib40[2] & mr);
2374 buf0[3] = tib40[3];
2375 buf1[0] = tib41[0];
2376 buf1[1] = tib41[1];
2377 buf1[2] = tib41[2];
2378 buf1[3] = tib41[3];
2379 break;
2380 case 3: buf0[3] = (buf0[3] & ml) | p1n | (tib40[3] & mr);
2381 buf1[0] = tib41[0];
2382 buf1[1] = tib41[1];
2383 buf1[2] = tib41[2];
2384 buf1[3] = tib41[3];
2385 break;
2386 case 4: buf1[0] = (buf1[0] & ml) | p1n | (tib41[0] & mr);
2387 buf1[1] = tib41[1];
2388 buf1[2] = tib41[2];
2389 buf1[3] = tib41[3];
2390 break;
2391 case 5: buf1[1] = (buf1[1] & ml) | p1n | (tib41[1] & mr);
2392 buf1[2] = tib41[2];
2393 buf1[3] = tib41[3];
2394 break;
2395 case 6: buf1[2] = (buf1[2] & ml) | p1n | (tib41[2] & mr);
2396 buf1[3] = tib41[3];
2397 break;
2398 case 7: buf1[3] = (buf1[3] & ml) | p1n | (tib41[3] & mr);
2399 break;
2400 }
2401
2402 out_len++;
2403
2404 return out_len;
2405 }
2406
2407 static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2408 {
2409 if (p0 >= in_len) return (in_len);
2410
2411 const u32 p1n = p1 << ((p0 & 3) * 8);
2412
2413 const u32 m = ~(0xffu << ((p0 & 3) * 8));
2414
2415 switch (p0 / 4)
2416 {
2417 case 0: buf0[0] = (buf0[0] & m) | p1n; break;
2418 case 1: buf0[1] = (buf0[1] & m) | p1n; break;
2419 case 2: buf0[2] = (buf0[2] & m) | p1n; break;
2420 case 3: buf0[3] = (buf0[3] & m) | p1n; break;
2421 case 4: buf1[0] = (buf1[0] & m) | p1n; break;
2422 case 5: buf1[1] = (buf1[1] & m) | p1n; break;
2423 case 6: buf1[2] = (buf1[2] & m) | p1n; break;
2424 case 7: buf1[3] = (buf1[3] & m) | p1n; break;
2425 }
2426
2427 return in_len;
2428 }
2429
2430 static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2431 {
2432 if (p0 >= in_len) return (in_len);
2433
2434 truncate_right (buf0, buf1, p0);
2435
2436 return p0;
2437 }
2438
2439 static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2440 {
2441 #ifdef IS_NV
2442 for (u32 i = 0; i < in_len; i++)
2443 {
2444 switch (i)
2445 {
2446 case 0: if ((__byte_perm (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7650);
2447 break;
2448 case 1: if ((__byte_perm (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7604);
2449 break;
2450 case 2: if ((__byte_perm (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7054);
2451 break;
2452 case 3: if ((__byte_perm (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x0654);
2453 break;
2454 case 4: if ((__byte_perm (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7650);
2455 break;
2456 case 5: if ((__byte_perm (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7604);
2457 break;
2458 case 6: if ((__byte_perm (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7054);
2459 break;
2460 case 7: if ((__byte_perm (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x0654);
2461 break;
2462 case 8: if ((__byte_perm (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7650);
2463 break;
2464 case 9: if ((__byte_perm (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7604);
2465 break;
2466 case 10: if ((__byte_perm (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7054);
2467 break;
2468 case 11: if ((__byte_perm (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x0654);
2469 break;
2470 case 12: if ((__byte_perm (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7650);
2471 break;
2472 case 13: if ((__byte_perm (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7604);
2473 break;
2474 case 14: if ((__byte_perm (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7054);
2475 break;
2476 case 15: if ((__byte_perm (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x0654);
2477 break;
2478 case 16: if ((__byte_perm (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7650);
2479 break;
2480 case 17: if ((__byte_perm (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7604);
2481 break;
2482 case 18: if ((__byte_perm (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7054);
2483 break;
2484 case 19: if ((__byte_perm (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x0654);
2485 break;
2486 case 20: if ((__byte_perm (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7650);
2487 break;
2488 case 21: if ((__byte_perm (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7604);
2489 break;
2490 case 22: if ((__byte_perm (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7054);
2491 break;
2492 case 23: if ((__byte_perm (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x0654);
2493 break;
2494 case 24: if ((__byte_perm (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7650);
2495 break;
2496 case 25: if ((__byte_perm (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7604);
2497 break;
2498 case 26: if ((__byte_perm (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7054);
2499 break;
2500 case 27: if ((__byte_perm (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x0654);
2501 break;
2502 case 28: if ((__byte_perm (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7650);
2503 break;
2504 case 29: if ((__byte_perm (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7604);
2505 break;
2506 case 30: if ((__byte_perm (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7054);
2507 break;
2508 case 31: if ((__byte_perm (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x0654);
2509 break;
2510 }
2511 }
2512 #endif
2513
2514 #if defined IS_AMD || defined IS_GENERIC
2515 const uchar4 tmp0 = (uchar4) (p0);
2516 const uchar4 tmp1 = (uchar4) (p1);
2517
2518 uchar4 tmp;
2519
2520 tmp = as_uchar4 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint (tmp);
2521 tmp = as_uchar4 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint (tmp);
2522 tmp = as_uchar4 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint (tmp);
2523 tmp = as_uchar4 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint (tmp);
2524 tmp = as_uchar4 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint (tmp);
2525 tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp);
2526 tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp);
2527 tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp);
2528 #endif
2529
2530 return in_len;
2531 }
2532
2533 static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2534 {
2535 // TODO
2536 return in_len;
2537 }
2538
2539 static u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2540 {
2541 // TODO
2542 return in_len;
2543 }
2544
2545 static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2546 {
2547 if ( in_len == 0) return (in_len);
2548 if ((in_len + p0) >= 32) return (in_len);
2549
2550 u32 out_len = in_len;
2551
2552 const u32 tmp = buf0[0] & 0xFF;
2553
2554 rshift_block_N (buf0, buf1, buf0, buf1, p0);
2555
2556 #ifdef IS_NV
2557 switch (p0)
2558 {
2559 case 1: buf0[0] |= tmp;
2560 break;
2561 case 2: buf0[0] |= __byte_perm (tmp, 0, 0x5400);
2562 break;
2563 case 3: buf0[0] |= __byte_perm (tmp, 0, 0x4000);
2564 break;
2565 case 4: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2566 break;
2567 case 5: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2568 buf0[1] |= tmp;
2569 break;
2570 case 6: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2571 buf0[1] |= __byte_perm (tmp, 0, 0x5400);
2572 break;
2573 case 7: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2574 buf0[1] |= __byte_perm (tmp, 0, 0x4000);
2575 break;
2576 case 8: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2577 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2578 break;
2579 case 9: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2580 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2581 buf0[2] |= tmp;
2582 break;
2583 case 10: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2584 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2585 buf0[2] |= __byte_perm (tmp, 0, 0x5400);
2586 break;
2587 case 11: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2588 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2589 buf0[2] |= __byte_perm (tmp, 0, 0x4000);
2590 break;
2591 case 12: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2592 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2593 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2594 break;
2595 case 13: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2596 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2597 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2598 buf0[3] |= tmp;
2599 break;
2600 case 14: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2601 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2602 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2603 buf0[3] |= __byte_perm (tmp, 0, 0x5400);
2604 break;
2605 case 15: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2606 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2607 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2608 buf0[3] |= __byte_perm (tmp, 0, 0x4000);
2609 break;
2610 case 16: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2611 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2612 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2613 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2614 break;
2615 case 17: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2616 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2617 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2618 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2619 buf1[0] |= tmp;
2620 break;
2621 case 18: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2622 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2623 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2624 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2625 buf1[0] |= __byte_perm (tmp, 0, 0x5400);
2626 break;
2627 case 19: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2628 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2629 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2630 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2631 buf1[0] |= __byte_perm (tmp, 0, 0x4000);
2632 break;
2633 case 20: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2634 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2635 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2636 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2637 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2638 break;
2639 case 21: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2640 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2641 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2642 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2643 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2644 buf1[1] |= tmp;
2645 break;
2646 case 22: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2647 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2648 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2649 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2650 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2651 buf1[1] |= __byte_perm (tmp, 0, 0x5400);
2652 break;
2653 case 23: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2654 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2655 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2656 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2657 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2658 buf1[1] |= __byte_perm (tmp, 0, 0x4000);
2659 break;
2660 case 24: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2661 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2662 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2663 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2664 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2665 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2666 break;
2667 case 25: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2668 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2669 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2670 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2671 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2672 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2673 buf1[2] |= tmp;
2674 break;
2675 case 26: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2676 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2677 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2678 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2679 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2680 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2681 buf1[2] |= __byte_perm (tmp, 0, 0x5400);
2682 break;
2683 case 27: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2684 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2685 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2686 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2687 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2688 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2689 buf1[2] |= __byte_perm (tmp, 0, 0x4000);
2690 break;
2691 case 28: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2692 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2693 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2694 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2695 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2696 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2697 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2698 break;
2699 case 29: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2700 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2701 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2702 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2703 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2704 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2705 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2706 buf1[3] |= tmp;
2707 break;
2708 case 30: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2709 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2710 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2711 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2712 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2713 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2714 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2715 buf1[3] |= __byte_perm (tmp, 0, 0x5400);
2716 break;
2717 case 31: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
2718 buf0[1] |= __byte_perm (tmp, 0, 0x0000);
2719 buf0[2] |= __byte_perm (tmp, 0, 0x0000);
2720 buf0[3] |= __byte_perm (tmp, 0, 0x0000);
2721 buf1[0] |= __byte_perm (tmp, 0, 0x0000);
2722 buf1[1] |= __byte_perm (tmp, 0, 0x0000);
2723 buf1[2] |= __byte_perm (tmp, 0, 0x0000);
2724 buf1[3] |= __byte_perm (tmp, 0, 0x4000);
2725 break;
2726 }
2727 #endif
2728
2729 #if defined IS_AMD || defined IS_GENERIC
2730 switch (p0)
2731 {
2732 case 1: buf0[0] |= tmp << 0;
2733 break;
2734 case 2: buf0[0] |= tmp << 0 | tmp << 8;
2735 break;
2736 case 3: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2737 break;
2738 case 4: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2739 break;
2740 case 5: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2741 buf0[1] |= tmp << 0;
2742 break;
2743 case 6: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2744 buf0[1] |= tmp << 0 | tmp << 8;
2745 break;
2746 case 7: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2747 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2748 break;
2749 case 8: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2750 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2751 break;
2752 case 9: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2753 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2754 buf0[2] |= tmp << 0;
2755 break;
2756 case 10: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2757 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2758 buf0[2] |= tmp << 0 | tmp << 8;
2759 break;
2760 case 11: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2761 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2762 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2763 break;
2764 case 12: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2765 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2766 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2767 break;
2768 case 13: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2769 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2770 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2771 buf0[3] |= tmp << 0;
2772 break;
2773 case 14: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2774 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2775 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2776 buf0[3] |= tmp << 0 | tmp << 8;
2777 break;
2778 case 15: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2779 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2780 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2781 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2782 break;
2783 case 16: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2784 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2785 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2786 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2787 break;
2788 case 17: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2789 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2790 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2791 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2792 buf1[0] |= tmp << 0;
2793 break;
2794 case 18: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2795 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2796 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2797 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2798 buf1[0] |= tmp << 0 | tmp << 8;
2799 break;
2800 case 19: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2801 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2802 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2803 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2804 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16;
2805 break;
2806 case 20: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2807 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2808 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2809 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2810 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2811 break;
2812 case 21: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2813 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2814 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2815 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2816 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2817 buf1[1] |= tmp << 0;
2818 break;
2819 case 22: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2820 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2821 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2822 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2823 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2824 buf1[1] |= tmp << 0 | tmp << 8;
2825 break;
2826 case 23: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2827 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2828 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2829 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2830 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2831 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16;
2832 break;
2833 case 24: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2834 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2835 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2836 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2837 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2838 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2839 break;
2840 case 25: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2841 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2842 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2843 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2844 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2845 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2846 buf1[2] |= tmp << 0;
2847 break;
2848 case 26: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2849 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2850 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2851 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2852 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2853 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2854 buf1[2] |= tmp << 0 | tmp << 8;
2855 break;
2856 case 27: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2857 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2858 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2859 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2860 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2861 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2862 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16;
2863 break;
2864 case 28: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2865 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2866 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2867 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2868 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2869 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2870 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2871 break;
2872 case 29: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2873 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2874 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2875 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2876 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2877 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2878 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2879 buf1[3] |= tmp << 0;
2880 break;
2881 case 30: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2882 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2883 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2884 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2885 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2886 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2887 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2888 buf1[3] |= tmp << 0 | tmp << 8;
2889 break;
2890 case 31: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2891 buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2892 buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2893 buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2894 buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2895 buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2896 buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24;
2897 buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16;
2898 break;
2899 }
2900 #endif
2901
2902 out_len += p0;
2903
2904 return out_len;
2905 }
2906
2907 static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2908 {
2909 if ( in_len == 0) return (in_len);
2910 if ((in_len + p0) >= 32) return (in_len);
2911
2912 const u32 in_len1 = in_len - 1;
2913
2914 const u32 sh = (in_len1 & 3) * 8;
2915
2916 u32 tmp = 0;
2917
2918 switch (in_len1 / 4)
2919 {
2920 case 0: tmp = (buf0[0] >> sh) & 0xff; break;
2921 case 1: tmp = (buf0[1] >> sh) & 0xff; break;
2922 case 2: tmp = (buf0[2] >> sh) & 0xff; break;
2923 case 3: tmp = (buf0[3] >> sh) & 0xff; break;
2924 case 4: tmp = (buf1[0] >> sh) & 0xff; break;
2925 case 5: tmp = (buf1[1] >> sh) & 0xff; break;
2926 case 6: tmp = (buf1[2] >> sh) & 0xff; break;
2927 case 7: tmp = (buf1[3] >> sh) & 0xff; break;
2928 }
2929
2930 u32 out_len = in_len;
2931
2932 for (u32 i = 0; i < p0; i++)
2933 {
2934 append_block1 (out_len, buf0, buf1, tmp);
2935
2936 out_len++;
2937 }
2938
2939 return out_len;
2940 }
2941
2942 static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2943 {
2944 if ( in_len == 0) return (in_len);
2945 if ((in_len + in_len) >= 32) return (in_len);
2946
2947 u32 out_len = in_len;
2948
2949 u32 tib40[4];
2950 u32 tib41[4];
2951
2952 #ifdef IS_NV
2953 tib40[0] = __byte_perm (buf0[0], 0, 0x1100);
2954 tib40[1] = __byte_perm (buf0[0], 0, 0x3322);
2955 tib40[2] = __byte_perm (buf0[1], 0, 0x1100);
2956 tib40[3] = __byte_perm (buf0[1], 0, 0x3322);
2957 tib41[0] = __byte_perm (buf0[2], 0, 0x1100);
2958 tib41[1] = __byte_perm (buf0[2], 0, 0x3322);
2959 tib41[2] = __byte_perm (buf0[3], 0, 0x1100);
2960 tib41[3] = __byte_perm (buf0[3], 0, 0x3322);
2961
2962 buf0[0] = tib40[0];
2963 buf0[1] = tib40[1];
2964 buf0[2] = tib40[2];
2965 buf0[3] = tib40[3];
2966 buf1[0] = tib41[0];
2967 buf1[1] = tib41[1];
2968 buf1[2] = tib41[2];
2969 buf1[3] = tib41[3];
2970 #endif
2971
2972 #if defined IS_AMD || defined IS_GENERIC
2973 tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8);
2974 tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8);
2975 tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8);
2976 tib40[3] = ((buf0[1] & 0x00FF0000) >> 16) | ((buf0[1] & 0xFF000000) >> 8);
2977 tib41[0] = ((buf0[2] & 0x000000FF) << 0) | ((buf0[2] & 0x0000FF00) << 8);
2978 tib41[1] = ((buf0[2] & 0x00FF0000) >> 16) | ((buf0[2] & 0xFF000000) >> 8);
2979 tib41[2] = ((buf0[3] & 0x000000FF) << 0) | ((buf0[3] & 0x0000FF00) << 8);
2980 tib41[3] = ((buf0[3] & 0x00FF0000) >> 16) | ((buf0[3] & 0xFF000000) >> 8);
2981
2982 buf0[0] = tib40[0] | (tib40[0] << 8);
2983 buf0[1] = tib40[1] | (tib40[1] << 8);
2984 buf0[2] = tib40[2] | (tib40[2] << 8);
2985 buf0[3] = tib40[3] | (tib40[3] << 8);
2986 buf1[0] = tib41[0] | (tib41[0] << 8);
2987 buf1[1] = tib41[1] | (tib41[1] << 8);
2988 buf1[2] = tib41[2] | (tib41[2] << 8);
2989 buf1[3] = tib41[3] | (tib41[3] << 8);
2990 #endif
2991
2992 out_len = out_len + out_len;
2993
2994 return out_len;
2995 }
2996
2997 static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
2998 {
2999 if (in_len < 2) return (in_len);
3000
3001 #ifdef IS_NV
3002 buf0[0] = __byte_perm (buf0[0], 0, 0x3201);
3003 #endif
3004
3005 #if defined IS_AMD || defined IS_GENERIC
3006 buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3007 #endif
3008
3009 return in_len;
3010 }
3011
3012 static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3013 {
3014 if (in_len < 2) return (in_len);
3015
3016 #ifdef IS_NV
3017 switch (in_len)
3018 {
3019 case 2: buf0[0] = __byte_perm (buf0[0], 0, 0x5401);
3020 break;
3021 case 3: buf0[0] = __byte_perm (buf0[0], 0, 0x4120);
3022 break;
3023 case 4: buf0[0] = __byte_perm (buf0[0], 0, 0x2310);
3024 break;
3025 case 5: buf0[1] = __byte_perm (buf0[1], buf0[0], 0x7210);
3026 buf0[0] = __byte_perm (buf0[0], buf0[1], 0x4210);
3027 buf0[1] = __byte_perm (buf0[1], 0, 0x6543);
3028 break;
3029 case 6: buf0[1] = __byte_perm (buf0[1], 0, 0x5401);
3030 break;
3031 case 7: buf0[1] = __byte_perm (buf0[1], 0, 0x4120);
3032 break;
3033 case 8: buf0[1] = __byte_perm (buf0[1], 0, 0x2310);
3034 break;
3035 case 9: buf0[2] = __byte_perm (buf0[2], buf0[1], 0x7210);
3036 buf0[1] = __byte_perm (buf0[1], buf0[2], 0x4210);
3037 buf0[2] = __byte_perm (buf0[2], 0, 0x6543);
3038 break;
3039 case 10: buf0[2] = __byte_perm (buf0[2], 0, 0x5401);
3040 break;
3041 case 11: buf0[2] = __byte_perm (buf0[2], 0, 0x4120);
3042 break;
3043 case 12: buf0[2] = __byte_perm (buf0[2], 0, 0x2310);
3044 break;
3045 case 13: buf0[3] = __byte_perm (buf0[3], buf0[2], 0x7210);
3046 buf0[2] = __byte_perm (buf0[2], buf0[3], 0x4210);
3047 buf0[3] = __byte_perm (buf0[3], 0, 0x6543);
3048 break;
3049 case 14: buf0[3] = __byte_perm (buf0[3], 0, 0x5401);
3050 break;
3051 case 15: buf0[3] = __byte_perm (buf0[3], 0, 0x4120);
3052 break;
3053 case 16: buf0[3] = __byte_perm (buf0[3], 0, 0x2310);
3054 break;
3055 case 17: buf1[0] = __byte_perm (buf1[0], buf0[3], 0x7210);
3056 buf0[3] = __byte_perm (buf0[3], buf1[0], 0x4210);
3057 buf1[0] = __byte_perm (buf1[0], 0, 0x6543);
3058 break;
3059 case 18: buf1[0] = __byte_perm (buf1[0], 0, 0x5401);
3060 break;
3061 case 19: buf1[0] = __byte_perm (buf1[0], 0, 0x4120);
3062 break;
3063 case 20: buf1[0] = __byte_perm (buf1[0], 0, 0x2310);
3064 break;
3065 case 21: buf1[1] = __byte_perm (buf1[1], buf1[0], 0x7210);
3066 buf1[0] = __byte_perm (buf1[0], buf1[1], 0x4210);
3067 buf1[1] = __byte_perm (buf1[1], 0, 0x6543);
3068 break;
3069 case 22: buf1[1] = __byte_perm (buf1[1], 0, 0x5401);
3070 break;
3071 case 23: buf1[1] = __byte_perm (buf1[1], 0, 0x4120);
3072 break;
3073 case 24: buf1[1] = __byte_perm (buf1[1], 0, 0x2310);
3074 break;
3075 case 25: buf1[2] = __byte_perm (buf1[2], buf1[1], 0x7210);
3076 buf1[1] = __byte_perm (buf1[1], buf1[2], 0x4210);
3077 buf1[2] = __byte_perm (buf1[2], 0, 0x6543);
3078 break;
3079 case 26: buf1[2] = __byte_perm (buf1[2], 0, 0x5401);
3080 break;
3081 case 27: buf1[2] = __byte_perm (buf1[2], 0, 0x4120);
3082 break;
3083 case 28: buf1[2] = __byte_perm (buf1[2], 0, 0x2310);
3084 break;
3085 case 29: buf1[3] = __byte_perm (buf1[3], buf1[2], 0x7210);
3086 buf1[2] = __byte_perm (buf1[2], buf1[3], 0x4210);
3087 buf1[3] = __byte_perm (buf1[3], 0, 0x6543);
3088 break;
3089 case 30: buf1[3] = __byte_perm (buf1[3], 0, 0x5401);
3090 break;
3091 case 31: buf1[3] = __byte_perm (buf1[3], 0, 0x4120);
3092 break;
3093 }
3094 #endif
3095
3096 #if defined IS_AMD || defined IS_GENERIC
3097 switch (in_len)
3098 {
3099 case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
3100 break;
3101 case 3: buf0[0] = (buf0[0] & 0x000000FF) | ((buf0[0] << 8) & 0x00FF0000) | ((buf0[0] >> 8) & 0x0000FF00);
3102 break;
3103 case 4: buf0[0] = (buf0[0] & 0x0000FFFF) | ((buf0[0] << 8) & 0xFF000000) | ((buf0[0] >> 8) & 0x00FF0000);
3104 break;
3105 case 5: buf0[1] = (buf0[0] & 0xFF000000) | buf0[1];
3106 buf0[0] = (buf0[0] & 0x00FFFFFF) | (buf0[1] << 24);
3107 buf0[1] = (buf0[1] >> 24);
3108 break;
3109 case 6: buf0[1] = ((buf0[1] << 8) & 0x0000FF00) | ((buf0[1] >> 8) & 0x000000FF);
3110 break;
3111 case 7: buf0[1] = (buf0[1] & 0x000000FF) | ((buf0[1] << 8) & 0x00FF0000) | ((buf0[1] >> 8) & 0x0000FF00);
3112 break;
3113 case 8: buf0[1] = (buf0[1] & 0x0000FFFF) | ((buf0[1] << 8) & 0xFF000000) | ((buf0[1] >> 8) & 0x00FF0000);
3114 break;
3115 case 9: buf0[2] = (buf0[1] & 0xFF000000) | buf0[2];
3116 buf0[1] = (buf0[1] & 0x00FFFFFF) | (buf0[2] << 24);
3117 buf0[2] = (buf0[2] >> 24);
3118 break;
3119 case 10: buf0[2] = ((buf0[2] << 8) & 0x0000FF00) | ((buf0[2] >> 8) & 0x000000FF);
3120 break;
3121 case 11: buf0[2] = (buf0[2] & 0x000000FF) | ((buf0[2] << 8) & 0x00FF0000) | ((buf0[2] >> 8) & 0x0000FF00);
3122 break;
3123 case 12: buf0[2] = (buf0[2] & 0x0000FFFF) | ((buf0[2] << 8) & 0xFF000000) | ((buf0[2] >> 8) & 0x00FF0000);
3124 break;
3125 case 13: buf0[3] = (buf0[2] & 0xFF000000) | buf0[3];
3126 buf0[2] = (buf0[2] & 0x00FFFFFF) | (buf0[3] << 24);
3127 buf0[3] = (buf0[3] >> 24);
3128 break;
3129 case 14: buf0[3] = ((buf0[3] << 8) & 0x0000FF00) | ((buf0[3] >> 8) & 0x000000FF);
3130 break;
3131 case 15: buf0[3] = (buf0[3] & 0x000000FF) | ((buf0[3] << 8) & 0x00FF0000) | ((buf0[3] >> 8) & 0x0000FF00);
3132 break;
3133 case 16: buf0[3] = (buf0[3] & 0x0000FFFF) | ((buf0[3] << 8) & 0xFF000000) | ((buf0[3] >> 8) & 0x00FF0000);
3134 break;
3135 case 17: buf1[0] = (buf0[3] & 0xFF000000) | buf1[0];
3136 buf0[3] = (buf0[3] & 0x00FFFFFF) | (buf1[0] << 24);
3137 buf1[0] = (buf1[0] >> 24);
3138 break;
3139 case 18: buf1[0] = ((buf1[0] << 8) & 0x0000FF00) | ((buf1[0] >> 8) & 0x000000FF);
3140 break;
3141 case 19: buf1[0] = (buf1[0] & 0x000000FF) | ((buf1[0] << 8) & 0x00FF0000) | ((buf1[0] >> 8) & 0x0000FF00);
3142 break;
3143 case 20: buf1[0] = (buf1[0] & 0x0000FFFF) | ((buf1[0] << 8) & 0xFF000000) | ((buf1[0] >> 8) & 0x00FF0000);
3144 break;
3145 case 21: buf1[1] = (buf1[0] & 0xFF000000) | buf1[1];
3146 buf1[0] = (buf1[0] & 0x00FFFFFF) | (buf1[1] << 24);
3147 buf1[1] = (buf1[1] >> 24);
3148 break;
3149 case 22: buf1[1] = ((buf1[1] << 8) & 0x0000FF00) | ((buf1[1] >> 8) & 0x000000FF);
3150 break;
3151 case 23: buf1[1] = (buf1[1] & 0x000000FF) | ((buf1[1] << 8) & 0x00FF0000) | ((buf1[1] >> 8) & 0x0000FF00);
3152 break;
3153 case 24: buf1[1] = (buf1[1] & 0x0000FFFF) | ((buf1[1] << 8) & 0xFF000000) | ((buf1[1] >> 8) & 0x00FF0000);
3154 break;
3155 case 25: buf1[2] = (buf1[1] & 0xFF000000) | buf1[2];
3156 buf1[1] = (buf1[1] & 0x00FFFFFF) | (buf1[2] << 24);
3157 buf1[2] = (buf1[2] >> 24);
3158 break;
3159 case 26: buf1[2] = ((buf1[2] << 8) & 0x0000FF00) | ((buf1[2] >> 8) & 0x000000FF);
3160 break;
3161 case 27: buf1[2] = (buf1[2] & 0x000000FF) | ((buf1[2] << 8) & 0x00FF0000) | ((buf1[2] >> 8) & 0x0000FF00);
3162 break;
3163 case 28: buf1[2] = (buf1[2] & 0x0000FFFF) | ((buf1[2] << 8) & 0xFF000000) | ((buf1[2] >> 8) & 0x00FF0000);
3164 break;
3165 case 29: buf1[3] = (buf1[2] & 0xFF000000) | buf1[3];
3166 buf1[2] = (buf1[2] & 0x00FFFFFF) | (buf1[3] << 24);
3167 buf1[3] = (buf1[3] >> 24);
3168 break;
3169 case 30: buf1[3] = ((buf1[3] << 8) & 0x0000FF00) | ((buf1[3] >> 8) & 0x000000FF);
3170 break;
3171 case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00);
3172 break;
3173 }
3174 #endif
3175
3176 return in_len;
3177 }
3178
3179 static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3180 {
3181 if (p0 >= in_len) return (in_len);
3182 if (p1 >= in_len) return (in_len);
3183
3184 u32 tmp0 = 0;
3185 u32 tmp1 = 0;
3186
3187 #ifdef IS_NV
3188 switch (p0)
3189 {
3190 case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540);
3191 break;
3192 case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541);
3193 break;
3194 case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542);
3195 break;
3196 case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543);
3197 break;
3198 case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540);
3199 break;
3200 case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541);
3201 break;
3202 case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542);
3203 break;
3204 case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543);
3205 break;
3206 case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540);
3207 break;
3208 case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541);
3209 break;
3210 case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542);
3211 break;
3212 case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543);
3213 break;
3214 case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540);
3215 break;
3216 case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541);
3217 break;
3218 case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542);
3219 break;
3220 case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543);
3221 break;
3222 case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540);
3223 break;
3224 case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541);
3225 break;
3226 case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542);
3227 break;
3228 case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543);
3229 break;
3230 case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540);
3231 break;
3232 case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541);
3233 break;
3234 case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542);
3235 break;
3236 case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543);
3237 break;
3238 case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540);
3239 break;
3240 case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541);
3241 break;
3242 case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542);
3243 break;
3244 case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543);
3245 break;
3246 case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540);
3247 break;
3248 case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541);
3249 break;
3250 case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542);
3251 break;
3252 case 31: tmp0 = __byte_perm (buf1[3], 0, 0x6543);
3253 break;
3254 }
3255
3256 switch (p1)
3257 {
3258 case 0: tmp1 = __byte_perm (buf0[0], 0, 0x6540);
3259 buf0[0] = __byte_perm (tmp0, buf0[0], 0x7650);
3260 break;
3261 case 1: tmp1 = __byte_perm (buf0[0], 0, 0x6541);
3262 buf0[0] = __byte_perm (tmp0, buf0[0], 0x7604);
3263 break;
3264 case 2: tmp1 = __byte_perm (buf0[0], 0, 0x6542);
3265 buf0[0] = __byte_perm (tmp0, buf0[0], 0x7054);
3266 break;
3267 case 3: tmp1 = __byte_perm (buf0[0], 0, 0x6543);
3268 buf0[0] = __byte_perm (tmp0, buf0[0], 0x0654);
3269 break;
3270 case 4: tmp1 = __byte_perm (buf0[1], 0, 0x6540);
3271 buf0[1] = __byte_perm (tmp0, buf0[1], 0x7650);
3272 break;
3273 case 5: tmp1 = __byte_perm (buf0[1], 0, 0x6541);
3274 buf0[1] = __byte_perm (tmp0, buf0[1], 0x7604);
3275 break;
3276 case 6: tmp1 = __byte_perm (buf0[1], 0, 0x6542);
3277 buf0[1] = __byte_perm (tmp0, buf0[1], 0x7054);
3278 break;
3279 case 7: tmp1 = __byte_perm (buf0[1], 0, 0x6543);
3280 buf0[1] = __byte_perm (tmp0, buf0[1], 0x0654);
3281 break;
3282 case 8: tmp1 = __byte_perm (buf0[2], 0, 0x6540);
3283 buf0[2] = __byte_perm (tmp0, buf0[2], 0x7650);
3284 break;
3285 case 9: tmp1 = __byte_perm (buf0[2], 0, 0x6541);
3286 buf0[2] = __byte_perm (tmp0, buf0[2], 0x7604);
3287 break;
3288 case 10: tmp1 = __byte_perm (buf0[2], 0, 0x6542);
3289 buf0[2] = __byte_perm (tmp0, buf0[2], 0x7054);
3290 break;
3291 case 11: tmp1 = __byte_perm (buf0[2], 0, 0x6543);
3292 buf0[2] = __byte_perm (tmp0, buf0[2], 0x0654);
3293 break;
3294 case 12: tmp1 = __byte_perm (buf0[3], 0, 0x6540);
3295 buf0[3] = __byte_perm (tmp0, buf0[3], 0x7650);
3296 break;
3297 case 13: tmp1 = __byte_perm (buf0[3], 0, 0x6541);
3298 buf0[3] = __byte_perm (tmp0, buf0[3], 0x7604);
3299 break;
3300 case 14: tmp1 = __byte_perm (buf0[3], 0, 0x6542);
3301 buf0[3] = __byte_perm (tmp0, buf0[3], 0x7054);
3302 break;
3303 case 15: tmp1 = __byte_perm (buf0[3], 0, 0x6543);
3304 buf0[3] = __byte_perm (tmp0, buf0[3], 0x0654);
3305 break;
3306 case 16: tmp1 = __byte_perm (buf1[0], 0, 0x6540);
3307 buf1[0] = __byte_perm (tmp0, buf1[0], 0x7650);
3308 break;
3309 case 17: tmp1 = __byte_perm (buf1[0], 0, 0x6541);
3310 buf1[0] = __byte_perm (tmp0, buf1[0], 0x7604);
3311 break;
3312 case 18: tmp1 = __byte_perm (buf1[0], 0, 0x6542);
3313 buf1[0] = __byte_perm (tmp0, buf1[0], 0x7054);
3314 break;
3315 case 19: tmp1 = __byte_perm (buf1[0], 0, 0x6543);
3316 buf1[0] = __byte_perm (tmp0, buf1[0], 0x0654);
3317 break;
3318 case 20: tmp1 = __byte_perm (buf1[1], 0, 0x6540);
3319 buf1[1] = __byte_perm (tmp0, buf1[1], 0x7650);
3320 break;
3321 case 21: tmp1 = __byte_perm (buf1[1], 0, 0x6541);
3322 buf1[1] = __byte_perm (tmp0, buf1[1], 0x7604);
3323 break;
3324 case 22: tmp1 = __byte_perm (buf1[1], 0, 0x6542);
3325 buf1[1] = __byte_perm (tmp0, buf1[1], 0x7054);
3326 break;
3327 case 23: tmp1 = __byte_perm (buf1[1], 0, 0x6543);
3328 buf1[1] = __byte_perm (tmp0, buf1[1], 0x0654);
3329 break;
3330 case 24: tmp1 = __byte_perm (buf1[2], 0, 0x6540);
3331 buf1[2] = __byte_perm (tmp0, buf1[2], 0x7650);
3332 break;
3333 case 25: tmp1 = __byte_perm (buf1[2], 0, 0x6541);
3334 buf1[2] = __byte_perm (tmp0, buf1[2], 0x7604);
3335 break;
3336 case 26: tmp1 = __byte_perm (buf1[2], 0, 0x6542);
3337 buf1[2] = __byte_perm (tmp0, buf1[2], 0x7054);
3338 break;
3339 case 27: tmp1 = __byte_perm (buf1[2], 0, 0x6543);
3340 buf1[2] = __byte_perm (tmp0, buf1[2], 0x0654);
3341 break;
3342 case 28: tmp1 = __byte_perm (buf1[3], 0, 0x6540);
3343 buf1[3] = __byte_perm (tmp0, buf1[3], 0x7650);
3344 break;
3345 case 29: tmp1 = __byte_perm (buf1[3], 0, 0x6541);
3346 buf1[3] = __byte_perm (tmp0, buf1[3], 0x7604);
3347 break;
3348 case 30: tmp1 = __byte_perm (buf1[3], 0, 0x6542);
3349 buf1[3] = __byte_perm (tmp0, buf1[3], 0x7054);
3350 break;
3351 case 31: tmp1 = __byte_perm (buf1[3], 0, 0x6543);
3352 buf1[3] = __byte_perm (tmp0, buf1[3], 0x0654);
3353 break;
3354 }
3355
3356 switch (p0)
3357 {
3358 case 0: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7650);
3359 break;
3360 case 1: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7604);
3361 break;
3362 case 2: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7054);
3363 break;
3364 case 3: buf0[0] = __byte_perm (tmp1, buf0[0], 0x0654);
3365 break;
3366 case 4: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7650);
3367 break;
3368 case 5: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7604);
3369 break;
3370 case 6: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7054);
3371 break;
3372 case 7: buf0[1] = __byte_perm (tmp1, buf0[1], 0x0654);
3373 break;
3374 case 8: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7650);
3375 break;
3376 case 9: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7604);
3377 break;
3378 case 10: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7054);
3379 break;
3380 case 11: buf0[2] = __byte_perm (tmp1, buf0[2], 0x0654);
3381 break;
3382 case 12: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7650);
3383 break;
3384 case 13: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7604);
3385 break;
3386 case 14: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7054);
3387 break;
3388 case 15: buf0[3] = __byte_perm (tmp1, buf0[3], 0x0654);
3389 break;
3390 case 16: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7650);
3391 break;
3392 case 17: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7604);
3393 break;
3394 case 18: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7054);
3395 break;
3396 case 19: buf1[0] = __byte_perm (tmp1, buf1[0], 0x0654);
3397 break;
3398 case 20: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7650);
3399 break;
3400 case 21: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7604);
3401 break;
3402 case 22: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7054);
3403 break;
3404 case 23: buf1[1] = __byte_perm (tmp1, buf1[1], 0x0654);
3405 break;
3406 case 24: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7650);
3407 break;
3408 case 25: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7604);
3409 break;
3410 case 26: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7054);
3411 break;
3412 case 27: buf1[2] = __byte_perm (tmp1, buf1[2], 0x0654);
3413 break;
3414 case 28: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7650);
3415 break;
3416 case 29: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7604);
3417 break;
3418 case 30: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7054);
3419 break;
3420 case 31: buf1[3] = __byte_perm (tmp1, buf1[3], 0x0654);
3421 break;
3422 }
3423 #endif
3424
3425 #if defined IS_AMD || defined IS_GENERIC
3426 switch (p0)
3427 {
3428 case 0: tmp0 = (buf0[0] >> 0) & 0xFF;
3429 break;
3430 case 1: tmp0 = (buf0[0] >> 8) & 0xFF;
3431 break;
3432 case 2: tmp0 = (buf0[0] >> 16) & 0xFF;
3433 break;
3434 case 3: tmp0 = (buf0[0] >> 24) & 0xFF;
3435 break;
3436 case 4: tmp0 = (buf0[1] >> 0) & 0xFF;
3437 break;
3438 case 5: tmp0 = (buf0[1] >> 8) & 0xFF;
3439 break;
3440 case 6: tmp0 = (buf0[1] >> 16) & 0xFF;
3441 break;
3442 case 7: tmp0 = (buf0[1] >> 24) & 0xFF;
3443 break;
3444 case 8: tmp0 = (buf0[2] >> 0) & 0xFF;
3445 break;
3446 case 9: tmp0 = (buf0[2] >> 8) & 0xFF;
3447 break;
3448 case 10: tmp0 = (buf0[2] >> 16) & 0xFF;
3449 break;
3450 case 11: tmp0 = (buf0[2] >> 24) & 0xFF;
3451 break;
3452 case 12: tmp0 = (buf0[3] >> 0) & 0xFF;
3453 break;
3454 case 13: tmp0 = (buf0[3] >> 8) & 0xFF;
3455 break;
3456 case 14: tmp0 = (buf0[3] >> 16) & 0xFF;
3457 break;
3458 case 15: tmp0 = (buf0[3] >> 24) & 0xFF;
3459 break;
3460 case 16: tmp0 = (buf1[0] >> 0) & 0xFF;
3461 break;
3462 case 17: tmp0 = (buf1[0] >> 8) & 0xFF;
3463 break;
3464 case 18: tmp0 = (buf1[0] >> 16) & 0xFF;
3465 break;
3466 case 19: tmp0 = (buf1[0] >> 24) & 0xFF;
3467 break;
3468 case 20: tmp0 = (buf1[1] >> 0) & 0xFF;
3469 break;
3470 case 21: tmp0 = (buf1[1] >> 8) & 0xFF;
3471 break;
3472 case 22: tmp0 = (buf1[1] >> 16) & 0xFF;
3473 break;
3474 case 23: tmp0 = (buf1[1] >> 24) & 0xFF;
3475 break;
3476 case 24: tmp0 = (buf1[2] >> 0) & 0xFF;
3477 break;
3478 case 25: tmp0 = (buf1[2] >> 8) & 0xFF;
3479 break;
3480 case 26: tmp0 = (buf1[2] >> 16) & 0xFF;
3481 break;
3482 case 27: tmp0 = (buf1[2] >> 24) & 0xFF;
3483 break;
3484 case 28: tmp0 = (buf1[3] >> 0) & 0xFF;
3485 break;
3486 case 29: tmp0 = (buf1[3] >> 8) & 0xFF;
3487 break;
3488 case 30: tmp0 = (buf1[3] >> 16) & 0xFF;
3489 break;
3490 case 31: tmp0 = (buf1[3] >> 24) & 0xFF;
3491 break;
3492 }
3493
3494 switch (p1)
3495 {
3496 case 0: tmp1 = (buf0[0] >> 0) & 0xff;
3497 buf0[0] = (buf0[0] & 0xffffff00) | tmp0 << 0;
3498 break;
3499 case 1: tmp1 = (buf0[0] >> 8) & 0xff;
3500 buf0[0] = (buf0[0] & 0xffff00ff) | tmp0 << 8;
3501 break;
3502 case 2: tmp1 = (buf0[0] >> 16) & 0xff;
3503 buf0[0] = (buf0[0] & 0xff00ffff) | tmp0 << 16;
3504 break;
3505 case 3: tmp1 = (buf0[0] >> 24) & 0xff;
3506 buf0[0] = (buf0[0] & 0x00ffffff) | tmp0 << 24;
3507 break;
3508 case 4: tmp1 = (buf0[1] >> 0) & 0xff;
3509 buf0[1] = (buf0[1] & 0xffffff00) | tmp0 << 0;
3510 break;
3511 case 5: tmp1 = (buf0[1] >> 8) & 0xff;
3512 buf0[1] = (buf0[1] & 0xffff00ff) | tmp0 << 8;
3513 break;
3514 case 6: tmp1 = (buf0[1] >> 16) & 0xff;
3515 buf0[1] = (buf0[1] & 0xff00ffff) | tmp0 << 16;
3516 break;
3517 case 7: tmp1 = (buf0[1] >> 24) & 0xff;
3518 buf0[1] = (buf0[1] & 0x00ffffff) | tmp0 << 24;
3519 break;
3520 case 8: tmp1 = (buf0[2] >> 0) & 0xff;
3521 buf0[2] = (buf0[2] & 0xffffff00) | tmp0 << 0;
3522 break;
3523 case 9: tmp1 = (buf0[2] >> 8) & 0xff;
3524 buf0[2] = (buf0[2] & 0xffff00ff) | tmp0 << 8;
3525 break;
3526 case 10: tmp1 = (buf0[2] >> 16) & 0xff;
3527 buf0[2] = (buf0[2] & 0xff00ffff) | tmp0 << 16;
3528 break;
3529 case 11: tmp1 = (buf0[2] >> 24) & 0xff;
3530 buf0[2] = (buf0[2] & 0x00ffffff) | tmp0 << 24;
3531 break;
3532 case 12: tmp1 = (buf0[3] >> 0) & 0xff;
3533 buf0[3] = (buf0[3] & 0xffffff00) | tmp0 << 0;
3534 break;
3535 case 13: tmp1 = (buf0[3] >> 8) & 0xff;
3536 buf0[3] = (buf0[3] & 0xffff00ff) | tmp0 << 8;
3537 break;
3538 case 14: tmp1 = (buf0[3] >> 16) & 0xff;
3539 buf0[3] = (buf0[3] & 0xff00ffff) | tmp0 << 16;
3540 break;
3541 case 15: tmp1 = (buf0[3] >> 24) & 0xff;
3542 buf0[3] = (buf0[3] & 0x00ffffff) | tmp0 << 24;
3543 break;
3544 case 16: tmp1 = (buf1[0] >> 0) & 0xff;
3545 buf1[0] = (buf1[0] & 0xffffff00) | tmp0 << 0;
3546 break;
3547 case 17: tmp1 = (buf1[0] >> 8) & 0xff;
3548 buf1[0] = (buf1[0] & 0xffff00ff) | tmp0 << 8;
3549 break;
3550 case 18: tmp1 = (buf1[0] >> 16) & 0xff;
3551 buf1[0] = (buf1[0] & 0xff00ffff) | tmp0 << 16;
3552 break;
3553 case 19: tmp1 = (buf1[0] >> 24) & 0xff;
3554 buf1[0] = (buf1[0] & 0x00ffffff) | tmp0 << 24;
3555 break;
3556 case 20: tmp1 = (buf1[1] >> 0) & 0xff;
3557 buf1[1] = (buf1[1] & 0xffffff00) | tmp0 << 0;
3558 break;
3559 case 21: tmp1 = (buf1[1] >> 8) & 0xff;
3560 buf1[1] = (buf1[1] & 0xffff00ff) | tmp0 << 8;
3561 break;
3562 case 22: tmp1 = (buf1[1] >> 16) & 0xff;
3563 buf1[1] = (buf1[1] & 0xff00ffff) | tmp0 << 16;
3564 break;
3565 case 23: tmp1 = (buf1[1] >> 24) & 0xff;
3566 buf1[1] = (buf1[1] & 0x00ffffff) | tmp0 << 24;
3567 break;
3568 case 24: tmp1 = (buf1[2] >> 0) & 0xff;
3569 buf1[2] = (buf1[2] & 0xffffff00) | tmp0 << 0;
3570 break;
3571 case 25: tmp1 = (buf1[2] >> 8) & 0xff;
3572 buf1[2] = (buf1[2] & 0xffff00ff) | tmp0 << 8;
3573 break;
3574 case 26: tmp1 = (buf1[2] >> 16) & 0xff;
3575 buf1[2] = (buf1[2] & 0xff00ffff) | tmp0 << 16;
3576 break;
3577 case 27: tmp1 = (buf1[2] >> 24) & 0xff;
3578 buf1[2] = (buf1[2] & 0x00ffffff) | tmp0 << 24;
3579 break;
3580 case 28: tmp1 = (buf1[3] >> 0) & 0xff;
3581 buf1[3] = (buf1[3] & 0xffffff00) | tmp0 << 0;
3582 break;
3583 case 29: tmp1 = (buf1[3] >> 8) & 0xff;
3584 buf1[3] = (buf1[3] & 0xffff00ff) | tmp0 << 8;
3585 break;
3586 case 30: tmp1 = (buf1[3] >> 16) & 0xff;
3587 buf1[3] = (buf1[3] & 0xff00ffff) | tmp0 << 16;
3588 break;
3589 case 31: tmp1 = (buf1[3] >> 24) & 0xff;
3590 buf1[3] = (buf1[3] & 0x00ffffff) | tmp0 << 24;
3591 break;
3592 }
3593
3594 switch (p0)
3595 {
3596 case 0: buf0[0] = (buf0[0] & 0xffffff00) | tmp1 << 0;
3597 break;
3598 case 1: buf0[0] = (buf0[0] & 0xffff00ff) | tmp1 << 8;
3599 break;
3600 case 2: buf0[0] = (buf0[0] & 0xff00ffff) | tmp1 << 16;
3601 break;
3602 case 3: buf0[0] = (buf0[0] & 0x00ffffff) | tmp1 << 24;
3603 break;
3604 case 4: buf0[1] = (buf0[1] & 0xffffff00) | tmp1 << 0;
3605 break;
3606 case 5: buf0[1] = (buf0[1] & 0xffff00ff) | tmp1 << 8;
3607 break;
3608 case 6: buf0[1] = (buf0[1] & 0xff00ffff) | tmp1 << 16;
3609 break;
3610 case 7: buf0[1] = (buf0[1] & 0x00ffffff) | tmp1 << 24;
3611 break;
3612 case 8: buf0[2] = (buf0[2] & 0xffffff00) | tmp1 << 0;
3613 break;
3614 case 9: buf0[2] = (buf0[2] & 0xffff00ff) | tmp1 << 8;
3615 break;
3616 case 10: buf0[2] = (buf0[2] & 0xff00ffff) | tmp1 << 16;
3617 break;
3618 case 11: buf0[2] = (buf0[2] & 0x00ffffff) | tmp1 << 24;
3619 break;
3620 case 12: buf0[3] = (buf0[3] & 0xffffff00) | tmp1 << 0;
3621 break;
3622 case 13: buf0[3] = (buf0[3] & 0xffff00ff) | tmp1 << 8;
3623 break;
3624 case 14: buf0[3] = (buf0[3] & 0xff00ffff) | tmp1 << 16;
3625 break;
3626 case 15: buf0[3] = (buf0[3] & 0x00ffffff) | tmp1 << 24;
3627 break;
3628 case 16: buf1[0] = (buf1[0] & 0xffffff00) | tmp1 << 0;
3629 break;
3630 case 17: buf1[0] = (buf1[0] & 0xffff00ff) | tmp1 << 8;
3631 break;
3632 case 18: buf1[0] = (buf1[0] & 0xff00ffff) | tmp1 << 16;
3633 break;
3634 case 19: buf1[0] = (buf1[0] & 0x00ffffff) | tmp1 << 24;
3635 break;
3636 case 20: buf1[1] = (buf1[1] & 0xffffff00) | tmp1 << 0;
3637 break;
3638 case 21: buf1[1] = (buf1[1] & 0xffff00ff) | tmp1 << 8;
3639 break;
3640 case 22: buf1[1] = (buf1[1] & 0xff00ffff) | tmp1 << 16;
3641 break;
3642 case 23: buf1[1] = (buf1[1] & 0x00ffffff) | tmp1 << 24;
3643 break;
3644 case 24: buf1[2] = (buf1[2] & 0xffffff00) | tmp1 << 0;
3645 break;
3646 case 25: buf1[2] = (buf1[2] & 0xffff00ff) | tmp1 << 8;
3647 break;
3648 case 26: buf1[2] = (buf1[2] & 0xff00ffff) | tmp1 << 16;
3649 break;
3650 case 27: buf1[2] = (buf1[2] & 0x00ffffff) | tmp1 << 24;
3651 break;
3652 case 28: buf1[3] = (buf1[3] & 0xffffff00) | tmp1 << 0;
3653 break;
3654 case 29: buf1[3] = (buf1[3] & 0xffff00ff) | tmp1 << 8;
3655 break;
3656 case 30: buf1[3] = (buf1[3] & 0xff00ffff) | tmp1 << 16;
3657 break;
3658 case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24;
3659 break;
3660 }
3661 #endif
3662
3663 return in_len;
3664 }
3665
3666 static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3667 {
3668 if (p0 >= in_len) return (in_len);
3669
3670 const u32 mr = 0xffu << ((p0 & 3) * 8);
3671 const u32 ml = ~mr;
3672
3673 switch (p0 / 4)
3674 {
3675 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) << 1) & mr); break;
3676 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) << 1) & mr); break;
3677 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) << 1) & mr); break;
3678 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) << 1) & mr); break;
3679 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) << 1) & mr); break;
3680 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) << 1) & mr); break;
3681 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) << 1) & mr); break;
3682 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) << 1) & mr); break;
3683 }
3684
3685 return in_len;
3686 }
3687
3688 static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3689 {
3690 if (p0 >= in_len) return (in_len);
3691
3692 const u32 mr = 0xffu << ((p0 & 3) * 8);
3693 const u32 ml = ~mr;
3694
3695 switch (p0 / 4)
3696 {
3697 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) >> 1) & mr); break;
3698 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) >> 1) & mr); break;
3699 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) >> 1) & mr); break;
3700 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) >> 1) & mr); break;
3701 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) >> 1) & mr); break;
3702 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) >> 1) & mr); break;
3703 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) >> 1) & mr); break;
3704 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) >> 1) & mr); break;
3705 }
3706
3707 return in_len;
3708 }
3709
3710 static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3711 {
3712 if (p0 >= in_len) return (in_len);
3713
3714 const u32 mr = 0xffu << ((p0 & 3) * 8);
3715 const u32 ml = ~mr;
3716
3717 const u32 n = 0x01010101 & mr;
3718
3719 switch (p0 / 4)
3720 {
3721 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) + n) & mr); break;
3722 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) + n) & mr); break;
3723 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) + n) & mr); break;
3724 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) + n) & mr); break;
3725 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) + n) & mr); break;
3726 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) + n) & mr); break;
3727 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) + n) & mr); break;
3728 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) + n) & mr); break;
3729 }
3730
3731 return in_len;
3732 }
3733
3734 static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3735 {
3736 if (p0 >= in_len) return (in_len);
3737
3738 const u32 mr = 0xffu << ((p0 & 3) * 8);
3739 const u32 ml = ~mr;
3740
3741 const u32 n = 0x01010101 & mr;
3742
3743 switch (p0 / 4)
3744 {
3745 case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) - n) & mr); break;
3746 case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) - n) & mr); break;
3747 case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) - n) & mr); break;
3748 case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) - n) & mr); break;
3749 case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) - n) & mr); break;
3750 case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) - n) & mr); break;
3751 case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) - n) & mr); break;
3752 case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) - n) & mr); break;
3753 }
3754
3755 return in_len;
3756 }
3757
3758 static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3759 {
3760 if ((p0 + 1) >= in_len) return (in_len);
3761
3762 u32 tib40[4];
3763 u32 tib41[4];
3764
3765 lshift_block (buf0, buf1, tib40, tib41);
3766
3767 const u32 mr = 0xffu << ((p0 & 3) * 8);
3768 const u32 ml = ~mr;
3769
3770 switch (p0 / 4)
3771 {
3772 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3773 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3774 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3775 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3776 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3777 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3778 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3779 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3780 }
3781
3782 return in_len;
3783 }
3784
3785 static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3786 {
3787 if (p0 == 0) return (in_len);
3788
3789 if (p0 >= in_len) return (in_len);
3790
3791 u32 tib40[4];
3792 u32 tib41[4];
3793
3794 rshift_block (buf0, buf1, tib40, tib41);
3795
3796 const u32 mr = 0xffu << ((p0 & 3) * 8);
3797 const u32 ml = ~mr;
3798
3799 switch (p0 / 4)
3800 {
3801 case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break;
3802 case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break;
3803 case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break;
3804 case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break;
3805 case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break;
3806 case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break;
3807 case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break;
3808 case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break;
3809 }
3810
3811 return in_len;
3812 }
3813
3814 static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3815 {
3816 if (p0 > in_len) return (in_len);
3817
3818 if ((in_len + p0) >= 32) return (in_len);
3819
3820 u32 out_len = in_len;
3821
3822 u32 tib40[4];
3823 u32 tib41[4];
3824
3825 tib40[0] = buf0[0];
3826 tib40[1] = buf0[1];
3827 tib40[2] = buf0[2];
3828 tib40[3] = buf0[3];
3829 tib41[0] = buf1[0];
3830 tib41[1] = buf1[1];
3831 tib41[2] = buf1[2];
3832 tib41[3] = buf1[3];
3833
3834 truncate_right (tib40, tib41, p0);
3835
3836 rshift_block_N (buf0, buf1, buf0, buf1, p0);
3837
3838 buf0[0] |= tib40[0];
3839 buf0[1] |= tib40[1];
3840 buf0[2] |= tib40[2];
3841 buf0[3] |= tib40[3];
3842 buf1[0] |= tib41[0];
3843 buf1[1] |= tib41[1];
3844 buf1[2] |= tib41[2];
3845 buf1[3] |= tib41[3];
3846
3847 out_len += p0;
3848
3849 return out_len;
3850 }
3851
3852 static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3853 {
3854 if (p0 > in_len) return (in_len);
3855
3856 if ((in_len + p0) >= 32) return (in_len);
3857
3858 u32 out_len = in_len;
3859
3860 u32 tib40[4];
3861 u32 tib41[4];
3862
3863 rshift_block_N (buf0, buf1, tib40, tib41, p0);
3864
3865 truncate_left (tib40, tib41, out_len);
3866
3867 buf0[0] |= tib40[0];
3868 buf0[1] |= tib40[1];
3869 buf0[2] |= tib40[2];
3870 buf0[3] |= tib40[3];
3871 buf1[0] |= tib41[0];
3872 buf1[1] |= tib41[1];
3873 buf1[2] |= tib41[2];
3874 buf1[3] |= tib41[3];
3875
3876 out_len += p0;
3877
3878 return out_len;
3879 }
3880
3881 static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
3882 {
3883 buf0[0] |= (generate_cmask (buf0[0]));
3884 buf0[1] |= (generate_cmask (buf0[1]));
3885 buf0[2] |= (generate_cmask (buf0[2]));
3886 buf0[3] |= (generate_cmask (buf0[3]));
3887 buf1[0] |= (generate_cmask (buf1[0]));
3888 buf1[1] |= (generate_cmask (buf1[1]));
3889 buf1[2] |= (generate_cmask (buf1[2]));
3890 buf1[3] |= (generate_cmask (buf1[3]));
3891
3892 #ifdef IS_NV
3893 buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0]));
3894
3895 for (u32 i = 0; i < in_len; i++)
3896 {
3897 u32 tmp0;
3898 u32 tmp1;
3899
3900 switch (i)
3901 {
3902 case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540);
3903 tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break;
3904 case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541);
3905 tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break;
3906 case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542);
3907 tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break;
3908 case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543);
3909 tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break;
3910 case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540);
3911 tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break;
3912 case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541);
3913 tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break;
3914 case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542);
3915 tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break;
3916 case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543);
3917 tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break;
3918 case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540);
3919 tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break;
3920 case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541);
3921 tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break;
3922 case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542);
3923 tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break;
3924 case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543);
3925 tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break;
3926 case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540);
3927 tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break;
3928 case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541);
3929 tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break;
3930 case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542);
3931 tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break;
3932 case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543);
3933 tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break;
3934 case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540);
3935 tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break;
3936 case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541);
3937 tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break;
3938 case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542);
3939 tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break;
3940 case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543);
3941 tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break;
3942 case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540);
3943 tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break;
3944 case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541);
3945 tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break;
3946 case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542);
3947 tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break;
3948 case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543);
3949 tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break;
3950 case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540);
3951 tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break;
3952 case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541);
3953 tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break;
3954 case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542);
3955 tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break;
3956 case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543);
3957 tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break;
3958 case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540);
3959 tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break;
3960 case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541);
3961 tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break;
3962 case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542);
3963 tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break;
3964 }
3965
3966 if (i < 3)
3967 {
3968 if (tmp0 == ' ') buf0[0] &= tmp1 ;
3969 }
3970 else if (i < 7)
3971 {
3972 if (tmp0 == ' ') buf0[1] &= tmp1 ;
3973 }
3974 else if (i < 11)
3975 {
3976 if (tmp0 == ' ') buf0[2] &= tmp1 ;
3977 }
3978 else if (i < 15)
3979 {
3980 if (tmp0 == ' ') buf0[3] &= tmp1 ;
3981 }
3982 else if (i < 19)
3983 {
3984 if (tmp0 == ' ') buf1[0] &= tmp1 ;
3985 }
3986 else if (i < 23)
3987 {
3988 if (tmp0 == ' ') buf1[1] &= tmp1 ;
3989 }
3990 else if (i < 27)
3991 {
3992 if (tmp0 == ' ') buf1[2] &= tmp1 ;
3993 }
3994 else if (i < 31)
3995 {
3996 if (tmp0 == ' ') buf1[3] &= tmp1 ;
3997 }
3998 }
3999 #endif
4000
4001 #if defined IS_AMD || defined IS_GENERIC
4002 u32 tib40[4];
4003 u32 tib41[4];
4004
4005 const uchar4 tmp0 = (uchar4) (' ');
4006 const uchar4 tmp1 = (uchar4) (0x00);
4007 const uchar4 tmp2 = (uchar4) (0xff);
4008
4009 uchar4 tmp;
4010
4011 tmp = as_uchar4 (buf0[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[0] = as_uint (tmp);
4012 tmp = as_uchar4 (buf0[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[1] = as_uint (tmp);
4013 tmp = as_uchar4 (buf0[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[2] = as_uint (tmp);
4014 tmp = as_uchar4 (buf0[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[3] = as_uint (tmp);
4015 tmp = as_uchar4 (buf1[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[0] = as_uint (tmp);
4016 tmp = as_uchar4 (buf1[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[1] = as_uint (tmp);
4017 tmp = as_uchar4 (buf1[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[2] = as_uint (tmp);
4018 tmp = as_uchar4 (buf1[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[3] = as_uint (tmp);
4019
4020 rshift_block (tib40, tib41, tib40, tib41); tib40[0] |= 0xff;
4021
4022 buf0[0] &= ~(generate_cmask (buf0[0]) & tib40[0]);
4023 buf0[1] &= ~(generate_cmask (buf0[1]) & tib40[1]);
4024 buf0[2] &= ~(generate_cmask (buf0[2]) & tib40[2]);
4025 buf0[3] &= ~(generate_cmask (buf0[3]) & tib40[3]);
4026 buf1[0] &= ~(generate_cmask (buf1[0]) & tib41[0]);
4027 buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]);
4028 buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]);
4029 buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]);
4030 #endif
4031
4032 return in_len;
4033 }
4034
4035 u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
4036 {
4037 u32 out_len = in_len;
4038
4039 switch (name)
4040 {
4041 case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break;
4042 case RULE_OP_MANGLE_UREST: out_len = rule_op_mangle_urest (p0, p1, buf0, buf1, out_len); break;
4043 case RULE_OP_MANGLE_LREST_UFIRST: out_len = rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, out_len); break;
4044 case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break;
4045 case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break;
4046 case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break;
4047 case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break;
4048 case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break;
4049 case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break;
4050 case RULE_OP_MANGLE_REFLECT: out_len = rule_op_mangle_reflect (p0, p1, buf0, buf1, out_len); break;
4051 case RULE_OP_MANGLE_APPEND: out_len = rule_op_mangle_append (p0, p1, buf0, buf1, out_len); break;
4052 case RULE_OP_MANGLE_PREPEND: out_len = rule_op_mangle_prepend (p0, p1, buf0, buf1, out_len); break;
4053 case RULE_OP_MANGLE_ROTATE_LEFT: out_len = rule_op_mangle_rotate_left (p0, p1, buf0, buf1, out_len); break;
4054 case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = rule_op_mangle_rotate_right (p0, p1, buf0, buf1, out_len); break;
4055 case RULE_OP_MANGLE_DELETE_FIRST: out_len = rule_op_mangle_delete_first (p0, p1, buf0, buf1, out_len); break;
4056 case RULE_OP_MANGLE_DELETE_LAST: out_len = rule_op_mangle_delete_last (p0, p1, buf0, buf1, out_len); break;
4057 case RULE_OP_MANGLE_DELETE_AT: out_len = rule_op_mangle_delete_at (p0, p1, buf0, buf1, out_len); break;
4058 case RULE_OP_MANGLE_EXTRACT: out_len = rule_op_mangle_extract (p0, p1, buf0, buf1, out_len); break;
4059 case RULE_OP_MANGLE_OMIT: out_len = rule_op_mangle_omit (p0, p1, buf0, buf1, out_len); break;
4060 case RULE_OP_MANGLE_INSERT: out_len = rule_op_mangle_insert (p0, p1, buf0, buf1, out_len); break;
4061 case RULE_OP_MANGLE_OVERSTRIKE: out_len = rule_op_mangle_overstrike (p0, p1, buf0, buf1, out_len); break;
4062 case RULE_OP_MANGLE_TRUNCATE_AT: out_len = rule_op_mangle_truncate_at (p0, p1, buf0, buf1, out_len); break;
4063 case RULE_OP_MANGLE_REPLACE: out_len = rule_op_mangle_replace (p0, p1, buf0, buf1, out_len); break;
4064 //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break;
4065 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break;
4066 case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = rule_op_mangle_dupechar_first (p0, p1, buf0, buf1, out_len); break;
4067 case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = rule_op_mangle_dupechar_last (p0, p1, buf0, buf1, out_len); break;
4068 case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = rule_op_mangle_dupechar_all (p0, p1, buf0, buf1, out_len); break;
4069 case RULE_OP_MANGLE_SWITCH_FIRST: out_len = rule_op_mangle_switch_first (p0, p1, buf0, buf1, out_len); break;
4070 case RULE_OP_MANGLE_SWITCH_LAST: out_len = rule_op_mangle_switch_last (p0, p1, buf0, buf1, out_len); break;
4071 case RULE_OP_MANGLE_SWITCH_AT: out_len = rule_op_mangle_switch_at (p0, p1, buf0, buf1, out_len); break;
4072 case RULE_OP_MANGLE_CHR_SHIFTL: out_len = rule_op_mangle_chr_shiftl (p0, p1, buf0, buf1, out_len); break;
4073 case RULE_OP_MANGLE_CHR_SHIFTR: out_len = rule_op_mangle_chr_shiftr (p0, p1, buf0, buf1, out_len); break;
4074 case RULE_OP_MANGLE_CHR_INCR: out_len = rule_op_mangle_chr_incr (p0, p1, buf0, buf1, out_len); break;
4075 case RULE_OP_MANGLE_CHR_DECR: out_len = rule_op_mangle_chr_decr (p0, p1, buf0, buf1, out_len); break;
4076 case RULE_OP_MANGLE_REPLACE_NP1: out_len = rule_op_mangle_replace_np1 (p0, p1, buf0, buf1, out_len); break;
4077 case RULE_OP_MANGLE_REPLACE_NM1: out_len = rule_op_mangle_replace_nm1 (p0, p1, buf0, buf1, out_len); break;
4078 case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = rule_op_mangle_dupeblock_first (p0, p1, buf0, buf1, out_len); break;
4079 case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = rule_op_mangle_dupeblock_last (p0, p1, buf0, buf1, out_len); break;
4080 case RULE_OP_MANGLE_TITLE: out_len = rule_op_mangle_title (p0, p1, buf0, buf1, out_len); break;
4081 }
4082
4083 return out_len;
4084 }
4085
4086 u32 apply_rules (__global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
4087 {
4088 u32 out_len = len;
4089
4090 for (u32 i = 0; cmds[i] != 0; i++)
4091 {
4092 const u32 cmd = cmds[i];
4093
4094 const u32 name = (cmd >> 0) & 0xff;
4095 const u32 p0 = (cmd >> 8) & 0xff;
4096 const u32 p1 = (cmd >> 16) & 0xff;
4097
4098 out_len = apply_rule (name, p0, p1, buf0, buf1, out_len);
4099 }
4100
4101 return out_len;
4102 }