Prepare new SIMD code for kernel, -m 0, 10, 20, 1000 should work in -a 3 mode and...
[hashcat.git] / OpenCL / common.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 static int hash_comp (const u32 d1[4], __global u32 *d2)
7 {
8 if (d1[3] > d2[DGST_R3]) return ( 1);
9 if (d1[3] < d2[DGST_R3]) return (-1);
10 if (d1[2] > d2[DGST_R2]) return ( 1);
11 if (d1[2] < d2[DGST_R2]) return (-1);
12 if (d1[1] > d2[DGST_R1]) return ( 1);
13 if (d1[1] < d2[DGST_R1]) return (-1);
14 if (d1[0] > d2[DGST_R0]) return ( 1);
15 if (d1[0] < d2[DGST_R0]) return (-1);
16
17 return (0);
18 }
19
20 static int find_hash (const u32 digest[4], const u32 digests_cnt, __global digest_t *digests_buf)
21 {
22 for (u32 l = 0, r = digests_cnt; r; r >>= 1)
23 {
24 const u32 m = r >> 1;
25
26 const u32 c = l + m;
27
28 const int cmp = hash_comp (digest, digests_buf[c].digest_buf);
29
30 if (cmp > 0)
31 {
32 l += m + 1;
33
34 r--;
35 }
36
37 if (cmp == 0) return (c);
38 }
39
40 return (-1);
41 }
42
43 static u32 check_bitmap (__global u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest)
44 {
45 return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f)));
46 }
47
48 static u32 check (const u32 digest[2], __global u32 *bitmap_s1_a, __global u32 *bitmap_s1_b, __global u32 *bitmap_s1_c, __global u32 *bitmap_s1_d, __global u32 *bitmap_s2_a, __global u32 *bitmap_s2_b, __global u32 *bitmap_s2_c, __global u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2)
49 {
50 if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0);
51 if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0);
52 if (check_bitmap (bitmap_s1_c, bitmap_mask, bitmap_shift1, digest[2]) == 0) return (0);
53 if (check_bitmap (bitmap_s1_d, bitmap_mask, bitmap_shift1, digest[3]) == 0) return (0);
54
55 if (check_bitmap (bitmap_s2_a, bitmap_mask, bitmap_shift2, digest[0]) == 0) return (0);
56 if (check_bitmap (bitmap_s2_b, bitmap_mask, bitmap_shift2, digest[1]) == 0) return (0);
57 if (check_bitmap (bitmap_s2_c, bitmap_mask, bitmap_shift2, digest[2]) == 0) return (0);
58 if (check_bitmap (bitmap_s2_d, bitmap_mask, bitmap_shift2, digest[3]) == 0) return (0);
59
60 return (1);
61 }
62
63 static void mark_hash (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos)
64 {
65 hashes_shown[hash_pos] = 1;
66
67 plains_buf[hash_pos].gidvid = (gid * 1) + 0;
68 plains_buf[hash_pos].il_pos = il_pos;
69 }
70
71 static void truncate_block (u32 w[4], const u32 len)
72 {
73 switch (len)
74 {
75 case 0: w[0] &= 0;
76 w[1] &= 0;
77 w[2] &= 0;
78 w[3] &= 0;
79 break;
80 case 1: w[0] &= 0x000000FF;
81 w[1] &= 0;
82 w[2] &= 0;
83 w[3] &= 0;
84 break;
85 case 2: w[0] &= 0x0000FFFF;
86 w[1] &= 0;
87 w[2] &= 0;
88 w[3] &= 0;
89 break;
90 case 3: w[0] &= 0x00FFFFFF;
91 w[1] &= 0;
92 w[2] &= 0;
93 w[3] &= 0;
94 break;
95 case 4: w[1] &= 0;
96 w[2] &= 0;
97 w[3] &= 0;
98 break;
99 case 5: w[1] &= 0x000000FF;
100 w[2] &= 0;
101 w[3] &= 0;
102 break;
103 case 6: w[1] &= 0x0000FFFF;
104 w[2] &= 0;
105 w[3] &= 0;
106 break;
107 case 7: w[1] &= 0x00FFFFFF;
108 w[2] &= 0;
109 w[3] &= 0;
110 break;
111 case 8: w[2] &= 0;
112 w[3] &= 0;
113 break;
114 case 9: w[2] &= 0x000000FF;
115 w[3] &= 0;
116 break;
117 case 10: w[2] &= 0x0000FFFF;
118 w[3] &= 0;
119 break;
120 case 11: w[2] &= 0x00FFFFFF;
121 w[3] &= 0;
122 break;
123 case 12: w[3] &= 0;
124 break;
125 case 13: w[3] &= 0x000000FF;
126 break;
127 case 14: w[3] &= 0x0000FFFF;
128 break;
129 case 15: w[3] &= 0x00FFFFFF;
130 break;
131 }
132 }
133
134 static void make_unicode_S (const u32 in[4], u32 out1[4], u32 out2[4])
135 {
136 #ifdef IS_NV
137 out2[3] = __byte_perm_S (in[3], 0, 0x7372);
138 out2[2] = __byte_perm_S (in[3], 0, 0x7170);
139 out2[1] = __byte_perm_S (in[2], 0, 0x7372);
140 out2[0] = __byte_perm_S (in[2], 0, 0x7170);
141 out1[3] = __byte_perm_S (in[1], 0, 0x7372);
142 out1[2] = __byte_perm_S (in[1], 0, 0x7170);
143 out1[1] = __byte_perm_S (in[0], 0, 0x7372);
144 out1[0] = __byte_perm_S (in[0], 0, 0x7170);
145 #endif
146
147 #if defined IS_AMD || defined IS_GENERIC
148 out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
149 out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
150 out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
151 out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF);
152 out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF);
153 out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
154 out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
155 out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
156 #endif
157 }
158
159 static void make_unicode (const u32x in[4], u32x out1[4], u32x out2[4])
160 {
161 #ifdef IS_NV
162 out2[3] = __byte_perm (in[3], 0, 0x7372);
163 out2[2] = __byte_perm (in[3], 0, 0x7170);
164 out2[1] = __byte_perm (in[2], 0, 0x7372);
165 out2[0] = __byte_perm (in[2], 0, 0x7170);
166 out1[3] = __byte_perm (in[1], 0, 0x7372);
167 out1[2] = __byte_perm (in[1], 0, 0x7170);
168 out1[1] = __byte_perm (in[0], 0, 0x7372);
169 out1[0] = __byte_perm (in[0], 0, 0x7170);
170 #endif
171
172 #if defined IS_AMD || defined IS_GENERIC
173 out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
174 out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
175 out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
176 out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF);
177 out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF);
178 out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
179 out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
180 out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
181 #endif
182 }
183
184 static void undo_unicode_S (const u32 in1[4], const u32 in2[4], u32 out[4])
185 {
186 #ifdef IS_NV
187 out[0] = __byte_perm_S (in1[0], in1[1], 0x6420);
188 out[1] = __byte_perm_S (in1[2], in1[3], 0x6420);
189 out[2] = __byte_perm_S (in2[0], in2[1], 0x6420);
190 out[3] = __byte_perm_S (in2[2], in2[3], 0x6420);
191 #endif
192
193 #if defined IS_AMD || defined IS_GENERIC
194 out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
195 | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
196 out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
197 | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8);
198 out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8)
199 | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
200 out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
201 | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
202 #endif
203 }
204
205 static void undo_unicode (const u32x in1[4], const u32x in2[4], u32x out[4])
206 {
207 #ifdef IS_NV
208 out[0] = __byte_perm (in1[0], in1[1], 0x6420);
209 out[1] = __byte_perm (in1[2], in1[3], 0x6420);
210 out[2] = __byte_perm (in2[0], in2[1], 0x6420);
211 out[3] = __byte_perm (in2[2], in2[3], 0x6420);
212 #endif
213
214 #if defined IS_AMD || defined IS_GENERIC
215 out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
216 | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
217 out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
218 | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8);
219 out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8)
220 | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
221 out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
222 | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
223 #endif
224 }
225
226 static void append_0x01_1x4 (u32 w0[4], const u32 offset)
227 {
228 switch (offset)
229 {
230 case 0:
231 w0[0] = 0x01;
232 break;
233
234 case 1:
235 w0[0] = w0[0] | 0x0100;
236 break;
237
238 case 2:
239 w0[0] = w0[0] | 0x010000;
240 break;
241
242 case 3:
243 w0[0] = w0[0] | 0x01000000;
244 break;
245
246 case 4:
247 w0[1] = 0x01;
248 break;
249
250 case 5:
251 w0[1] = w0[1] | 0x0100;
252 break;
253
254 case 6:
255 w0[1] = w0[1] | 0x010000;
256 break;
257
258 case 7:
259 w0[1] = w0[1] | 0x01000000;
260 break;
261
262 case 8:
263 w0[2] = 0x01;
264 break;
265
266 case 9:
267 w0[2] = w0[2] | 0x0100;
268 break;
269
270 case 10:
271 w0[2] = w0[2] | 0x010000;
272 break;
273
274 case 11:
275 w0[2] = w0[2] | 0x01000000;
276 break;
277
278 case 12:
279 w0[3] = 0x01;
280 break;
281
282 case 13:
283 w0[3] = w0[3] | 0x0100;
284 break;
285
286 case 14:
287 w0[3] = w0[3] | 0x010000;
288 break;
289
290 case 15:
291 w0[3] = w0[3] | 0x01000000;
292 break;
293 }
294 }
295
296 static void append_0x01_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
297 {
298 switch (offset)
299 {
300 case 0:
301 w0[0] = 0x01;
302 break;
303
304 case 1:
305 w0[0] = w0[0] | 0x0100;
306 break;
307
308 case 2:
309 w0[0] = w0[0] | 0x010000;
310 break;
311
312 case 3:
313 w0[0] = w0[0] | 0x01000000;
314 break;
315
316 case 4:
317 w0[1] = 0x01;
318 break;
319
320 case 5:
321 w0[1] = w0[1] | 0x0100;
322 break;
323
324 case 6:
325 w0[1] = w0[1] | 0x010000;
326 break;
327
328 case 7:
329 w0[1] = w0[1] | 0x01000000;
330 break;
331
332 case 8:
333 w0[2] = 0x01;
334 break;
335
336 case 9:
337 w0[2] = w0[2] | 0x0100;
338 break;
339
340 case 10:
341 w0[2] = w0[2] | 0x010000;
342 break;
343
344 case 11:
345 w0[2] = w0[2] | 0x01000000;
346 break;
347
348 case 12:
349 w0[3] = 0x01;
350 break;
351
352 case 13:
353 w0[3] = w0[3] | 0x0100;
354 break;
355
356 case 14:
357 w0[3] = w0[3] | 0x010000;
358 break;
359
360 case 15:
361 w0[3] = w0[3] | 0x01000000;
362 break;
363
364 case 16:
365 w1[0] = 0x01;
366 break;
367
368 case 17:
369 w1[0] = w1[0] | 0x0100;
370 break;
371
372 case 18:
373 w1[0] = w1[0] | 0x010000;
374 break;
375
376 case 19:
377 w1[0] = w1[0] | 0x01000000;
378 break;
379
380 case 20:
381 w1[1] = 0x01;
382 break;
383
384 case 21:
385 w1[1] = w1[1] | 0x0100;
386 break;
387
388 case 22:
389 w1[1] = w1[1] | 0x010000;
390 break;
391
392 case 23:
393 w1[1] = w1[1] | 0x01000000;
394 break;
395
396 case 24:
397 w1[2] = 0x01;
398 break;
399
400 case 25:
401 w1[2] = w1[2] | 0x0100;
402 break;
403
404 case 26:
405 w1[2] = w1[2] | 0x010000;
406 break;
407
408 case 27:
409 w1[2] = w1[2] | 0x01000000;
410 break;
411
412 case 28:
413 w1[3] = 0x01;
414 break;
415
416 case 29:
417 w1[3] = w1[3] | 0x0100;
418 break;
419
420 case 30:
421 w1[3] = w1[3] | 0x010000;
422 break;
423
424 case 31:
425 w1[3] = w1[3] | 0x01000000;
426 break;
427 }
428 }
429
430 static void append_0x01_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
431 {
432 switch (offset)
433 {
434 case 0:
435 w0[0] = 0x01;
436 break;
437
438 case 1:
439 w0[0] = w0[0] | 0x0100;
440 break;
441
442 case 2:
443 w0[0] = w0[0] | 0x010000;
444 break;
445
446 case 3:
447 w0[0] = w0[0] | 0x01000000;
448 break;
449
450 case 4:
451 w0[1] = 0x01;
452 break;
453
454 case 5:
455 w0[1] = w0[1] | 0x0100;
456 break;
457
458 case 6:
459 w0[1] = w0[1] | 0x010000;
460 break;
461
462 case 7:
463 w0[1] = w0[1] | 0x01000000;
464 break;
465
466 case 8:
467 w0[2] = 0x01;
468 break;
469
470 case 9:
471 w0[2] = w0[2] | 0x0100;
472 break;
473
474 case 10:
475 w0[2] = w0[2] | 0x010000;
476 break;
477
478 case 11:
479 w0[2] = w0[2] | 0x01000000;
480 break;
481
482 case 12:
483 w0[3] = 0x01;
484 break;
485
486 case 13:
487 w0[3] = w0[3] | 0x0100;
488 break;
489
490 case 14:
491 w0[3] = w0[3] | 0x010000;
492 break;
493
494 case 15:
495 w0[3] = w0[3] | 0x01000000;
496 break;
497
498 case 16:
499 w1[0] = 0x01;
500 break;
501
502 case 17:
503 w1[0] = w1[0] | 0x0100;
504 break;
505
506 case 18:
507 w1[0] = w1[0] | 0x010000;
508 break;
509
510 case 19:
511 w1[0] = w1[0] | 0x01000000;
512 break;
513
514 case 20:
515 w1[1] = 0x01;
516 break;
517
518 case 21:
519 w1[1] = w1[1] | 0x0100;
520 break;
521
522 case 22:
523 w1[1] = w1[1] | 0x010000;
524 break;
525
526 case 23:
527 w1[1] = w1[1] | 0x01000000;
528 break;
529
530 case 24:
531 w1[2] = 0x01;
532 break;
533
534 case 25:
535 w1[2] = w1[2] | 0x0100;
536 break;
537
538 case 26:
539 w1[2] = w1[2] | 0x010000;
540 break;
541
542 case 27:
543 w1[2] = w1[2] | 0x01000000;
544 break;
545
546 case 28:
547 w1[3] = 0x01;
548 break;
549
550 case 29:
551 w1[3] = w1[3] | 0x0100;
552 break;
553
554 case 30:
555 w1[3] = w1[3] | 0x010000;
556 break;
557
558 case 31:
559 w1[3] = w1[3] | 0x01000000;
560 break;
561
562 case 32:
563 w2[0] = 0x01;
564 break;
565
566 case 33:
567 w2[0] = w2[0] | 0x0100;
568 break;
569
570 case 34:
571 w2[0] = w2[0] | 0x010000;
572 break;
573
574 case 35:
575 w2[0] = w2[0] | 0x01000000;
576 break;
577
578 case 36:
579 w2[1] = 0x01;
580 break;
581
582 case 37:
583 w2[1] = w2[1] | 0x0100;
584 break;
585
586 case 38:
587 w2[1] = w2[1] | 0x010000;
588 break;
589
590 case 39:
591 w2[1] = w2[1] | 0x01000000;
592 break;
593
594 case 40:
595 w2[2] = 0x01;
596 break;
597
598 case 41:
599 w2[2] = w2[2] | 0x0100;
600 break;
601
602 case 42:
603 w2[2] = w2[2] | 0x010000;
604 break;
605
606 case 43:
607 w2[2] = w2[2] | 0x01000000;
608 break;
609
610 case 44:
611 w2[3] = 0x01;
612 break;
613
614 case 45:
615 w2[3] = w2[3] | 0x0100;
616 break;
617
618 case 46:
619 w2[3] = w2[3] | 0x010000;
620 break;
621
622 case 47:
623 w2[3] = w2[3] | 0x01000000;
624 break;
625 }
626 }
627
628 static void append_0x01_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
629 {
630 switch (offset)
631 {
632 case 0:
633 w0[0] = 0x01;
634 break;
635
636 case 1:
637 w0[0] = w0[0] | 0x0100;
638 break;
639
640 case 2:
641 w0[0] = w0[0] | 0x010000;
642 break;
643
644 case 3:
645 w0[0] = w0[0] | 0x01000000;
646 break;
647
648 case 4:
649 w0[1] = 0x01;
650 break;
651
652 case 5:
653 w0[1] = w0[1] | 0x0100;
654 break;
655
656 case 6:
657 w0[1] = w0[1] | 0x010000;
658 break;
659
660 case 7:
661 w0[1] = w0[1] | 0x01000000;
662 break;
663
664 case 8:
665 w0[2] = 0x01;
666 break;
667
668 case 9:
669 w0[2] = w0[2] | 0x0100;
670 break;
671
672 case 10:
673 w0[2] = w0[2] | 0x010000;
674 break;
675
676 case 11:
677 w0[2] = w0[2] | 0x01000000;
678 break;
679
680 case 12:
681 w0[3] = 0x01;
682 break;
683
684 case 13:
685 w0[3] = w0[3] | 0x0100;
686 break;
687
688 case 14:
689 w0[3] = w0[3] | 0x010000;
690 break;
691
692 case 15:
693 w0[3] = w0[3] | 0x01000000;
694 break;
695
696 case 16:
697 w1[0] = 0x01;
698 break;
699
700 case 17:
701 w1[0] = w1[0] | 0x0100;
702 break;
703
704 case 18:
705 w1[0] = w1[0] | 0x010000;
706 break;
707
708 case 19:
709 w1[0] = w1[0] | 0x01000000;
710 break;
711
712 case 20:
713 w1[1] = 0x01;
714 break;
715
716 case 21:
717 w1[1] = w1[1] | 0x0100;
718 break;
719
720 case 22:
721 w1[1] = w1[1] | 0x010000;
722 break;
723
724 case 23:
725 w1[1] = w1[1] | 0x01000000;
726 break;
727
728 case 24:
729 w1[2] = 0x01;
730 break;
731
732 case 25:
733 w1[2] = w1[2] | 0x0100;
734 break;
735
736 case 26:
737 w1[2] = w1[2] | 0x010000;
738 break;
739
740 case 27:
741 w1[2] = w1[2] | 0x01000000;
742 break;
743
744 case 28:
745 w1[3] = 0x01;
746 break;
747
748 case 29:
749 w1[3] = w1[3] | 0x0100;
750 break;
751
752 case 30:
753 w1[3] = w1[3] | 0x010000;
754 break;
755
756 case 31:
757 w1[3] = w1[3] | 0x01000000;
758 break;
759
760 case 32:
761 w2[0] = 0x01;
762 break;
763
764 case 33:
765 w2[0] = w2[0] | 0x0100;
766 break;
767
768 case 34:
769 w2[0] = w2[0] | 0x010000;
770 break;
771
772 case 35:
773 w2[0] = w2[0] | 0x01000000;
774 break;
775
776 case 36:
777 w2[1] = 0x01;
778 break;
779
780 case 37:
781 w2[1] = w2[1] | 0x0100;
782 break;
783
784 case 38:
785 w2[1] = w2[1] | 0x010000;
786 break;
787
788 case 39:
789 w2[1] = w2[1] | 0x01000000;
790 break;
791
792 case 40:
793 w2[2] = 0x01;
794 break;
795
796 case 41:
797 w2[2] = w2[2] | 0x0100;
798 break;
799
800 case 42:
801 w2[2] = w2[2] | 0x010000;
802 break;
803
804 case 43:
805 w2[2] = w2[2] | 0x01000000;
806 break;
807
808 case 44:
809 w2[3] = 0x01;
810 break;
811
812 case 45:
813 w2[3] = w2[3] | 0x0100;
814 break;
815
816 case 46:
817 w2[3] = w2[3] | 0x010000;
818 break;
819
820 case 47:
821 w2[3] = w2[3] | 0x01000000;
822 break;
823
824 case 48:
825 w3[0] = 0x01;
826 break;
827
828 case 49:
829 w3[0] = w3[0] | 0x0100;
830 break;
831
832 case 50:
833 w3[0] = w3[0] | 0x010000;
834 break;
835
836 case 51:
837 w3[0] = w3[0] | 0x01000000;
838 break;
839
840 case 52:
841 w3[1] = 0x01;
842 break;
843
844 case 53:
845 w3[1] = w3[1] | 0x0100;
846 break;
847
848 case 54:
849 w3[1] = w3[1] | 0x010000;
850 break;
851
852 case 55:
853 w3[1] = w3[1] | 0x01000000;
854 break;
855
856 case 56:
857 w3[2] = 0x01;
858 break;
859
860 case 57:
861 w3[2] = w3[2] | 0x0100;
862 break;
863
864 case 58:
865 w3[2] = w3[2] | 0x010000;
866 break;
867
868 case 59:
869 w3[2] = w3[2] | 0x01000000;
870 break;
871
872 case 60:
873 w3[3] = 0x01;
874 break;
875
876 case 61:
877 w3[3] = w3[3] | 0x0100;
878 break;
879
880 case 62:
881 w3[3] = w3[3] | 0x010000;
882 break;
883
884 case 63:
885 w3[3] = w3[3] | 0x01000000;
886 break;
887 }
888 }
889
890 static void append_0x01_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
891 {
892 switch (offset)
893 {
894 case 0:
895 w0[0] = 0x01;
896 break;
897
898 case 1:
899 w0[0] = w0[0] | 0x0100;
900 break;
901
902 case 2:
903 w0[0] = w0[0] | 0x010000;
904 break;
905
906 case 3:
907 w0[0] = w0[0] | 0x01000000;
908 break;
909
910 case 4:
911 w0[1] = 0x01;
912 break;
913
914 case 5:
915 w0[1] = w0[1] | 0x0100;
916 break;
917
918 case 6:
919 w0[1] = w0[1] | 0x010000;
920 break;
921
922 case 7:
923 w0[1] = w0[1] | 0x01000000;
924 break;
925
926 case 8:
927 w0[2] = 0x01;
928 break;
929
930 case 9:
931 w0[2] = w0[2] | 0x0100;
932 break;
933
934 case 10:
935 w0[2] = w0[2] | 0x010000;
936 break;
937
938 case 11:
939 w0[2] = w0[2] | 0x01000000;
940 break;
941
942 case 12:
943 w0[3] = 0x01;
944 break;
945
946 case 13:
947 w0[3] = w0[3] | 0x0100;
948 break;
949
950 case 14:
951 w0[3] = w0[3] | 0x010000;
952 break;
953
954 case 15:
955 w0[3] = w0[3] | 0x01000000;
956 break;
957
958 case 16:
959 w1[0] = 0x01;
960 break;
961
962 case 17:
963 w1[0] = w1[0] | 0x0100;
964 break;
965
966 case 18:
967 w1[0] = w1[0] | 0x010000;
968 break;
969
970 case 19:
971 w1[0] = w1[0] | 0x01000000;
972 break;
973
974 case 20:
975 w1[1] = 0x01;
976 break;
977
978 case 21:
979 w1[1] = w1[1] | 0x0100;
980 break;
981
982 case 22:
983 w1[1] = w1[1] | 0x010000;
984 break;
985
986 case 23:
987 w1[1] = w1[1] | 0x01000000;
988 break;
989
990 case 24:
991 w1[2] = 0x01;
992 break;
993
994 case 25:
995 w1[2] = w1[2] | 0x0100;
996 break;
997
998 case 26:
999 w1[2] = w1[2] | 0x010000;
1000 break;
1001
1002 case 27:
1003 w1[2] = w1[2] | 0x01000000;
1004 break;
1005
1006 case 28:
1007 w1[3] = 0x01;
1008 break;
1009
1010 case 29:
1011 w1[3] = w1[3] | 0x0100;
1012 break;
1013
1014 case 30:
1015 w1[3] = w1[3] | 0x010000;
1016 break;
1017
1018 case 31:
1019 w1[3] = w1[3] | 0x01000000;
1020 break;
1021
1022 case 32:
1023 w2[0] = 0x01;
1024 break;
1025
1026 case 33:
1027 w2[0] = w2[0] | 0x0100;
1028 break;
1029
1030 case 34:
1031 w2[0] = w2[0] | 0x010000;
1032 break;
1033
1034 case 35:
1035 w2[0] = w2[0] | 0x01000000;
1036 break;
1037
1038 case 36:
1039 w2[1] = 0x01;
1040 break;
1041
1042 case 37:
1043 w2[1] = w2[1] | 0x0100;
1044 break;
1045
1046 case 38:
1047 w2[1] = w2[1] | 0x010000;
1048 break;
1049
1050 case 39:
1051 w2[1] = w2[1] | 0x01000000;
1052 break;
1053
1054 case 40:
1055 w2[2] = 0x01;
1056 break;
1057
1058 case 41:
1059 w2[2] = w2[2] | 0x0100;
1060 break;
1061
1062 case 42:
1063 w2[2] = w2[2] | 0x010000;
1064 break;
1065
1066 case 43:
1067 w2[2] = w2[2] | 0x01000000;
1068 break;
1069
1070 case 44:
1071 w2[3] = 0x01;
1072 break;
1073
1074 case 45:
1075 w2[3] = w2[3] | 0x0100;
1076 break;
1077
1078 case 46:
1079 w2[3] = w2[3] | 0x010000;
1080 break;
1081
1082 case 47:
1083 w2[3] = w2[3] | 0x01000000;
1084 break;
1085
1086 case 48:
1087 w3[0] = 0x01;
1088 break;
1089
1090 case 49:
1091 w3[0] = w3[0] | 0x0100;
1092 break;
1093
1094 case 50:
1095 w3[0] = w3[0] | 0x010000;
1096 break;
1097
1098 case 51:
1099 w3[0] = w3[0] | 0x01000000;
1100 break;
1101
1102 case 52:
1103 w3[1] = 0x01;
1104 break;
1105
1106 case 53:
1107 w3[1] = w3[1] | 0x0100;
1108 break;
1109
1110 case 54:
1111 w3[1] = w3[1] | 0x010000;
1112 break;
1113
1114 case 55:
1115 w3[1] = w3[1] | 0x01000000;
1116 break;
1117
1118 case 56:
1119 w3[2] = 0x01;
1120 break;
1121
1122 case 57:
1123 w3[2] = w3[2] | 0x0100;
1124 break;
1125
1126 case 58:
1127 w3[2] = w3[2] | 0x010000;
1128 break;
1129
1130 case 59:
1131 w3[2] = w3[2] | 0x01000000;
1132 break;
1133
1134 case 60:
1135 w3[3] = 0x01;
1136 break;
1137
1138 case 61:
1139 w3[3] = w3[3] | 0x0100;
1140 break;
1141
1142 case 62:
1143 w3[3] = w3[3] | 0x010000;
1144 break;
1145
1146 case 63:
1147 w3[3] = w3[3] | 0x01000000;
1148 break;
1149
1150 case 64:
1151 w4[0] = 0x01;
1152 break;
1153
1154 case 65:
1155 w4[0] = w4[0] | 0x0100;
1156 break;
1157
1158 case 66:
1159 w4[0] = w4[0] | 0x010000;
1160 break;
1161
1162 case 67:
1163 w4[0] = w4[0] | 0x01000000;
1164 break;
1165
1166 case 68:
1167 w4[1] = 0x01;
1168 break;
1169
1170 case 69:
1171 w4[1] = w4[1] | 0x0100;
1172 break;
1173
1174 case 70:
1175 w4[1] = w4[1] | 0x010000;
1176 break;
1177
1178 case 71:
1179 w4[1] = w4[1] | 0x01000000;
1180 break;
1181
1182 case 72:
1183 w4[2] = 0x01;
1184 break;
1185
1186 case 73:
1187 w4[2] = w4[2] | 0x0100;
1188 break;
1189
1190 case 74:
1191 w4[2] = w4[2] | 0x010000;
1192 break;
1193
1194 case 75:
1195 w4[2] = w4[2] | 0x01000000;
1196 break;
1197
1198 case 76:
1199 w4[3] = 0x01;
1200 break;
1201
1202 case 77:
1203 w4[3] = w4[3] | 0x0100;
1204 break;
1205
1206 case 78:
1207 w4[3] = w4[3] | 0x010000;
1208 break;
1209
1210 case 79:
1211 w4[3] = w4[3] | 0x01000000;
1212 break;
1213
1214 case 80:
1215 w5[0] = 0x01;
1216 break;
1217
1218 case 81:
1219 w5[0] = w5[0] | 0x0100;
1220 break;
1221
1222 case 82:
1223 w5[0] = w5[0] | 0x010000;
1224 break;
1225
1226 case 83:
1227 w5[0] = w5[0] | 0x01000000;
1228 break;
1229
1230 case 84:
1231 w5[1] = 0x01;
1232 break;
1233
1234 case 85:
1235 w5[1] = w5[1] | 0x0100;
1236 break;
1237
1238 case 86:
1239 w5[1] = w5[1] | 0x010000;
1240 break;
1241
1242 case 87:
1243 w5[1] = w5[1] | 0x01000000;
1244 break;
1245
1246 case 88:
1247 w5[2] = 0x01;
1248 break;
1249
1250 case 89:
1251 w5[2] = w5[2] | 0x0100;
1252 break;
1253
1254 case 90:
1255 w5[2] = w5[2] | 0x010000;
1256 break;
1257
1258 case 91:
1259 w5[2] = w5[2] | 0x01000000;
1260 break;
1261
1262 case 92:
1263 w5[3] = 0x01;
1264 break;
1265
1266 case 93:
1267 w5[3] = w5[3] | 0x0100;
1268 break;
1269
1270 case 94:
1271 w5[3] = w5[3] | 0x010000;
1272 break;
1273
1274 case 95:
1275 w5[3] = w5[3] | 0x01000000;
1276 break;
1277
1278 case 96:
1279 w6[0] = 0x01;
1280 break;
1281
1282 case 97:
1283 w6[0] = w6[0] | 0x0100;
1284 break;
1285
1286 case 98:
1287 w6[0] = w6[0] | 0x010000;
1288 break;
1289
1290 case 99:
1291 w6[0] = w6[0] | 0x01000000;
1292 break;
1293
1294 case 100:
1295 w6[1] = 0x01;
1296 break;
1297
1298 case 101:
1299 w6[1] = w6[1] | 0x0100;
1300 break;
1301
1302 case 102:
1303 w6[1] = w6[1] | 0x010000;
1304 break;
1305
1306 case 103:
1307 w6[1] = w6[1] | 0x01000000;
1308 break;
1309
1310 case 104:
1311 w6[2] = 0x01;
1312 break;
1313
1314 case 105:
1315 w6[2] = w6[2] | 0x0100;
1316 break;
1317
1318 case 106:
1319 w6[2] = w6[2] | 0x010000;
1320 break;
1321
1322 case 107:
1323 w6[2] = w6[2] | 0x01000000;
1324 break;
1325
1326 case 108:
1327 w6[3] = 0x01;
1328 break;
1329
1330 case 109:
1331 w6[3] = w6[3] | 0x0100;
1332 break;
1333
1334 case 110:
1335 w6[3] = w6[3] | 0x010000;
1336 break;
1337
1338 case 111:
1339 w6[3] = w6[3] | 0x01000000;
1340 break;
1341
1342 case 112:
1343 w7[0] = 0x01;
1344 break;
1345
1346 case 113:
1347 w7[0] = w7[0] | 0x0100;
1348 break;
1349
1350 case 114:
1351 w7[0] = w7[0] | 0x010000;
1352 break;
1353
1354 case 115:
1355 w7[0] = w7[0] | 0x01000000;
1356 break;
1357
1358 case 116:
1359 w7[1] = 0x01;
1360 break;
1361
1362 case 117:
1363 w7[1] = w7[1] | 0x0100;
1364 break;
1365
1366 case 118:
1367 w7[1] = w7[1] | 0x010000;
1368 break;
1369
1370 case 119:
1371 w7[1] = w7[1] | 0x01000000;
1372 break;
1373
1374 case 120:
1375 w7[2] = 0x01;
1376 break;
1377
1378 case 121:
1379 w7[2] = w7[2] | 0x0100;
1380 break;
1381
1382 case 122:
1383 w7[2] = w7[2] | 0x010000;
1384 break;
1385
1386 case 123:
1387 w7[2] = w7[2] | 0x01000000;
1388 break;
1389
1390 case 124:
1391 w7[3] = 0x01;
1392 break;
1393
1394 case 125:
1395 w7[3] = w7[3] | 0x0100;
1396 break;
1397
1398 case 126:
1399 w7[3] = w7[3] | 0x010000;
1400 break;
1401
1402 case 127:
1403 w7[3] = w7[3] | 0x01000000;
1404 break;
1405 }
1406 }
1407
1408 static void append_0x02_1x4 (u32 w0[4], const u32 offset)
1409 {
1410 switch (offset)
1411 {
1412 case 0:
1413 w0[0] = 0x02;
1414 break;
1415
1416 case 1:
1417 w0[0] = w0[0] | 0x0200;
1418 break;
1419
1420 case 2:
1421 w0[0] = w0[0] | 0x020000;
1422 break;
1423
1424 case 3:
1425 w0[0] = w0[0] | 0x02000000;
1426 break;
1427
1428 case 4:
1429 w0[1] = 0x02;
1430 break;
1431
1432 case 5:
1433 w0[1] = w0[1] | 0x0200;
1434 break;
1435
1436 case 6:
1437 w0[1] = w0[1] | 0x020000;
1438 break;
1439
1440 case 7:
1441 w0[1] = w0[1] | 0x02000000;
1442 break;
1443
1444 case 8:
1445 w0[2] = 0x02;
1446 break;
1447
1448 case 9:
1449 w0[2] = w0[2] | 0x0200;
1450 break;
1451
1452 case 10:
1453 w0[2] = w0[2] | 0x020000;
1454 break;
1455
1456 case 11:
1457 w0[2] = w0[2] | 0x02000000;
1458 break;
1459
1460 case 12:
1461 w0[3] = 0x02;
1462 break;
1463
1464 case 13:
1465 w0[3] = w0[3] | 0x0200;
1466 break;
1467
1468 case 14:
1469 w0[3] = w0[3] | 0x020000;
1470 break;
1471
1472 case 15:
1473 w0[3] = w0[3] | 0x02000000;
1474 break;
1475 }
1476 }
1477
1478 static void append_0x02_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
1479 {
1480 switch (offset)
1481 {
1482 case 0:
1483 w0[0] = 0x02;
1484 break;
1485
1486 case 1:
1487 w0[0] = w0[0] | 0x0200;
1488 break;
1489
1490 case 2:
1491 w0[0] = w0[0] | 0x020000;
1492 break;
1493
1494 case 3:
1495 w0[0] = w0[0] | 0x02000000;
1496 break;
1497
1498 case 4:
1499 w0[1] = 0x02;
1500 break;
1501
1502 case 5:
1503 w0[1] = w0[1] | 0x0200;
1504 break;
1505
1506 case 6:
1507 w0[1] = w0[1] | 0x020000;
1508 break;
1509
1510 case 7:
1511 w0[1] = w0[1] | 0x02000000;
1512 break;
1513
1514 case 8:
1515 w0[2] = 0x02;
1516 break;
1517
1518 case 9:
1519 w0[2] = w0[2] | 0x0200;
1520 break;
1521
1522 case 10:
1523 w0[2] = w0[2] | 0x020000;
1524 break;
1525
1526 case 11:
1527 w0[2] = w0[2] | 0x02000000;
1528 break;
1529
1530 case 12:
1531 w0[3] = 0x02;
1532 break;
1533
1534 case 13:
1535 w0[3] = w0[3] | 0x0200;
1536 break;
1537
1538 case 14:
1539 w0[3] = w0[3] | 0x020000;
1540 break;
1541
1542 case 15:
1543 w0[3] = w0[3] | 0x02000000;
1544 break;
1545
1546 case 16:
1547 w1[0] = 0x02;
1548 break;
1549
1550 case 17:
1551 w1[0] = w1[0] | 0x0200;
1552 break;
1553
1554 case 18:
1555 w1[0] = w1[0] | 0x020000;
1556 break;
1557
1558 case 19:
1559 w1[0] = w1[0] | 0x02000000;
1560 break;
1561
1562 case 20:
1563 w1[1] = 0x02;
1564 break;
1565
1566 case 21:
1567 w1[1] = w1[1] | 0x0200;
1568 break;
1569
1570 case 22:
1571 w1[1] = w1[1] | 0x020000;
1572 break;
1573
1574 case 23:
1575 w1[1] = w1[1] | 0x02000000;
1576 break;
1577
1578 case 24:
1579 w1[2] = 0x02;
1580 break;
1581
1582 case 25:
1583 w1[2] = w1[2] | 0x0200;
1584 break;
1585
1586 case 26:
1587 w1[2] = w1[2] | 0x020000;
1588 break;
1589
1590 case 27:
1591 w1[2] = w1[2] | 0x02000000;
1592 break;
1593
1594 case 28:
1595 w1[3] = 0x02;
1596 break;
1597
1598 case 29:
1599 w1[3] = w1[3] | 0x0200;
1600 break;
1601
1602 case 30:
1603 w1[3] = w1[3] | 0x020000;
1604 break;
1605
1606 case 31:
1607 w1[3] = w1[3] | 0x02000000;
1608 break;
1609 }
1610 }
1611
1612 static void append_0x02_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
1613 {
1614 switch (offset)
1615 {
1616 case 0:
1617 w0[0] = 0x02;
1618 break;
1619
1620 case 1:
1621 w0[0] = w0[0] | 0x0200;
1622 break;
1623
1624 case 2:
1625 w0[0] = w0[0] | 0x020000;
1626 break;
1627
1628 case 3:
1629 w0[0] = w0[0] | 0x02000000;
1630 break;
1631
1632 case 4:
1633 w0[1] = 0x02;
1634 break;
1635
1636 case 5:
1637 w0[1] = w0[1] | 0x0200;
1638 break;
1639
1640 case 6:
1641 w0[1] = w0[1] | 0x020000;
1642 break;
1643
1644 case 7:
1645 w0[1] = w0[1] | 0x02000000;
1646 break;
1647
1648 case 8:
1649 w0[2] = 0x02;
1650 break;
1651
1652 case 9:
1653 w0[2] = w0[2] | 0x0200;
1654 break;
1655
1656 case 10:
1657 w0[2] = w0[2] | 0x020000;
1658 break;
1659
1660 case 11:
1661 w0[2] = w0[2] | 0x02000000;
1662 break;
1663
1664 case 12:
1665 w0[3] = 0x02;
1666 break;
1667
1668 case 13:
1669 w0[3] = w0[3] | 0x0200;
1670 break;
1671
1672 case 14:
1673 w0[3] = w0[3] | 0x020000;
1674 break;
1675
1676 case 15:
1677 w0[3] = w0[3] | 0x02000000;
1678 break;
1679
1680 case 16:
1681 w1[0] = 0x02;
1682 break;
1683
1684 case 17:
1685 w1[0] = w1[0] | 0x0200;
1686 break;
1687
1688 case 18:
1689 w1[0] = w1[0] | 0x020000;
1690 break;
1691
1692 case 19:
1693 w1[0] = w1[0] | 0x02000000;
1694 break;
1695
1696 case 20:
1697 w1[1] = 0x02;
1698 break;
1699
1700 case 21:
1701 w1[1] = w1[1] | 0x0200;
1702 break;
1703
1704 case 22:
1705 w1[1] = w1[1] | 0x020000;
1706 break;
1707
1708 case 23:
1709 w1[1] = w1[1] | 0x02000000;
1710 break;
1711
1712 case 24:
1713 w1[2] = 0x02;
1714 break;
1715
1716 case 25:
1717 w1[2] = w1[2] | 0x0200;
1718 break;
1719
1720 case 26:
1721 w1[2] = w1[2] | 0x020000;
1722 break;
1723
1724 case 27:
1725 w1[2] = w1[2] | 0x02000000;
1726 break;
1727
1728 case 28:
1729 w1[3] = 0x02;
1730 break;
1731
1732 case 29:
1733 w1[3] = w1[3] | 0x0200;
1734 break;
1735
1736 case 30:
1737 w1[3] = w1[3] | 0x020000;
1738 break;
1739
1740 case 31:
1741 w1[3] = w1[3] | 0x02000000;
1742 break;
1743
1744 case 32:
1745 w2[0] = 0x02;
1746 break;
1747
1748 case 33:
1749 w2[0] = w2[0] | 0x0200;
1750 break;
1751
1752 case 34:
1753 w2[0] = w2[0] | 0x020000;
1754 break;
1755
1756 case 35:
1757 w2[0] = w2[0] | 0x02000000;
1758 break;
1759
1760 case 36:
1761 w2[1] = 0x02;
1762 break;
1763
1764 case 37:
1765 w2[1] = w2[1] | 0x0200;
1766 break;
1767
1768 case 38:
1769 w2[1] = w2[1] | 0x020000;
1770 break;
1771
1772 case 39:
1773 w2[1] = w2[1] | 0x02000000;
1774 break;
1775
1776 case 40:
1777 w2[2] = 0x02;
1778 break;
1779
1780 case 41:
1781 w2[2] = w2[2] | 0x0200;
1782 break;
1783
1784 case 42:
1785 w2[2] = w2[2] | 0x020000;
1786 break;
1787
1788 case 43:
1789 w2[2] = w2[2] | 0x02000000;
1790 break;
1791
1792 case 44:
1793 w2[3] = 0x02;
1794 break;
1795
1796 case 45:
1797 w2[3] = w2[3] | 0x0200;
1798 break;
1799
1800 case 46:
1801 w2[3] = w2[3] | 0x020000;
1802 break;
1803
1804 case 47:
1805 w2[3] = w2[3] | 0x02000000;
1806 break;
1807 }
1808 }
1809
1810 static void append_0x02_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
1811 {
1812 switch (offset)
1813 {
1814 case 0:
1815 w0[0] = 0x02;
1816 break;
1817
1818 case 1:
1819 w0[0] = w0[0] | 0x0200;
1820 break;
1821
1822 case 2:
1823 w0[0] = w0[0] | 0x020000;
1824 break;
1825
1826 case 3:
1827 w0[0] = w0[0] | 0x02000000;
1828 break;
1829
1830 case 4:
1831 w0[1] = 0x02;
1832 break;
1833
1834 case 5:
1835 w0[1] = w0[1] | 0x0200;
1836 break;
1837
1838 case 6:
1839 w0[1] = w0[1] | 0x020000;
1840 break;
1841
1842 case 7:
1843 w0[1] = w0[1] | 0x02000000;
1844 break;
1845
1846 case 8:
1847 w0[2] = 0x02;
1848 break;
1849
1850 case 9:
1851 w0[2] = w0[2] | 0x0200;
1852 break;
1853
1854 case 10:
1855 w0[2] = w0[2] | 0x020000;
1856 break;
1857
1858 case 11:
1859 w0[2] = w0[2] | 0x02000000;
1860 break;
1861
1862 case 12:
1863 w0[3] = 0x02;
1864 break;
1865
1866 case 13:
1867 w0[3] = w0[3] | 0x0200;
1868 break;
1869
1870 case 14:
1871 w0[3] = w0[3] | 0x020000;
1872 break;
1873
1874 case 15:
1875 w0[3] = w0[3] | 0x02000000;
1876 break;
1877
1878 case 16:
1879 w1[0] = 0x02;
1880 break;
1881
1882 case 17:
1883 w1[0] = w1[0] | 0x0200;
1884 break;
1885
1886 case 18:
1887 w1[0] = w1[0] | 0x020000;
1888 break;
1889
1890 case 19:
1891 w1[0] = w1[0] | 0x02000000;
1892 break;
1893
1894 case 20:
1895 w1[1] = 0x02;
1896 break;
1897
1898 case 21:
1899 w1[1] = w1[1] | 0x0200;
1900 break;
1901
1902 case 22:
1903 w1[1] = w1[1] | 0x020000;
1904 break;
1905
1906 case 23:
1907 w1[1] = w1[1] | 0x02000000;
1908 break;
1909
1910 case 24:
1911 w1[2] = 0x02;
1912 break;
1913
1914 case 25:
1915 w1[2] = w1[2] | 0x0200;
1916 break;
1917
1918 case 26:
1919 w1[2] = w1[2] | 0x020000;
1920 break;
1921
1922 case 27:
1923 w1[2] = w1[2] | 0x02000000;
1924 break;
1925
1926 case 28:
1927 w1[3] = 0x02;
1928 break;
1929
1930 case 29:
1931 w1[3] = w1[3] | 0x0200;
1932 break;
1933
1934 case 30:
1935 w1[3] = w1[3] | 0x020000;
1936 break;
1937
1938 case 31:
1939 w1[3] = w1[3] | 0x02000000;
1940 break;
1941
1942 case 32:
1943 w2[0] = 0x02;
1944 break;
1945
1946 case 33:
1947 w2[0] = w2[0] | 0x0200;
1948 break;
1949
1950 case 34:
1951 w2[0] = w2[0] | 0x020000;
1952 break;
1953
1954 case 35:
1955 w2[0] = w2[0] | 0x02000000;
1956 break;
1957
1958 case 36:
1959 w2[1] = 0x02;
1960 break;
1961
1962 case 37:
1963 w2[1] = w2[1] | 0x0200;
1964 break;
1965
1966 case 38:
1967 w2[1] = w2[1] | 0x020000;
1968 break;
1969
1970 case 39:
1971 w2[1] = w2[1] | 0x02000000;
1972 break;
1973
1974 case 40:
1975 w2[2] = 0x02;
1976 break;
1977
1978 case 41:
1979 w2[2] = w2[2] | 0x0200;
1980 break;
1981
1982 case 42:
1983 w2[2] = w2[2] | 0x020000;
1984 break;
1985
1986 case 43:
1987 w2[2] = w2[2] | 0x02000000;
1988 break;
1989
1990 case 44:
1991 w2[3] = 0x02;
1992 break;
1993
1994 case 45:
1995 w2[3] = w2[3] | 0x0200;
1996 break;
1997
1998 case 46:
1999 w2[3] = w2[3] | 0x020000;
2000 break;
2001
2002 case 47:
2003 w2[3] = w2[3] | 0x02000000;
2004 break;
2005
2006 case 48:
2007 w3[0] = 0x02;
2008 break;
2009
2010 case 49:
2011 w3[0] = w3[0] | 0x0200;
2012 break;
2013
2014 case 50:
2015 w3[0] = w3[0] | 0x020000;
2016 break;
2017
2018 case 51:
2019 w3[0] = w3[0] | 0x02000000;
2020 break;
2021
2022 case 52:
2023 w3[1] = 0x02;
2024 break;
2025
2026 case 53:
2027 w3[1] = w3[1] | 0x0200;
2028 break;
2029
2030 case 54:
2031 w3[1] = w3[1] | 0x020000;
2032 break;
2033
2034 case 55:
2035 w3[1] = w3[1] | 0x02000000;
2036 break;
2037
2038 case 56:
2039 w3[2] = 0x02;
2040 break;
2041
2042 case 57:
2043 w3[2] = w3[2] | 0x0200;
2044 break;
2045
2046 case 58:
2047 w3[2] = w3[2] | 0x020000;
2048 break;
2049
2050 case 59:
2051 w3[2] = w3[2] | 0x02000000;
2052 break;
2053
2054 case 60:
2055 w3[3] = 0x02;
2056 break;
2057
2058 case 61:
2059 w3[3] = w3[3] | 0x0200;
2060 break;
2061
2062 case 62:
2063 w3[3] = w3[3] | 0x020000;
2064 break;
2065
2066 case 63:
2067 w3[3] = w3[3] | 0x02000000;
2068 break;
2069 }
2070 }
2071
2072 static void append_0x02_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
2073 {
2074 switch (offset)
2075 {
2076 case 0:
2077 w0[0] = 0x02;
2078 break;
2079
2080 case 1:
2081 w0[0] = w0[0] | 0x0200;
2082 break;
2083
2084 case 2:
2085 w0[0] = w0[0] | 0x020000;
2086 break;
2087
2088 case 3:
2089 w0[0] = w0[0] | 0x02000000;
2090 break;
2091
2092 case 4:
2093 w0[1] = 0x02;
2094 break;
2095
2096 case 5:
2097 w0[1] = w0[1] | 0x0200;
2098 break;
2099
2100 case 6:
2101 w0[1] = w0[1] | 0x020000;
2102 break;
2103
2104 case 7:
2105 w0[1] = w0[1] | 0x02000000;
2106 break;
2107
2108 case 8:
2109 w0[2] = 0x02;
2110 break;
2111
2112 case 9:
2113 w0[2] = w0[2] | 0x0200;
2114 break;
2115
2116 case 10:
2117 w0[2] = w0[2] | 0x020000;
2118 break;
2119
2120 case 11:
2121 w0[2] = w0[2] | 0x02000000;
2122 break;
2123
2124 case 12:
2125 w0[3] = 0x02;
2126 break;
2127
2128 case 13:
2129 w0[3] = w0[3] | 0x0200;
2130 break;
2131
2132 case 14:
2133 w0[3] = w0[3] | 0x020000;
2134 break;
2135
2136 case 15:
2137 w0[3] = w0[3] | 0x02000000;
2138 break;
2139
2140 case 16:
2141 w1[0] = 0x02;
2142 break;
2143
2144 case 17:
2145 w1[0] = w1[0] | 0x0200;
2146 break;
2147
2148 case 18:
2149 w1[0] = w1[0] | 0x020000;
2150 break;
2151
2152 case 19:
2153 w1[0] = w1[0] | 0x02000000;
2154 break;
2155
2156 case 20:
2157 w1[1] = 0x02;
2158 break;
2159
2160 case 21:
2161 w1[1] = w1[1] | 0x0200;
2162 break;
2163
2164 case 22:
2165 w1[1] = w1[1] | 0x020000;
2166 break;
2167
2168 case 23:
2169 w1[1] = w1[1] | 0x02000000;
2170 break;
2171
2172 case 24:
2173 w1[2] = 0x02;
2174 break;
2175
2176 case 25:
2177 w1[2] = w1[2] | 0x0200;
2178 break;
2179
2180 case 26:
2181 w1[2] = w1[2] | 0x020000;
2182 break;
2183
2184 case 27:
2185 w1[2] = w1[2] | 0x02000000;
2186 break;
2187
2188 case 28:
2189 w1[3] = 0x02;
2190 break;
2191
2192 case 29:
2193 w1[3] = w1[3] | 0x0200;
2194 break;
2195
2196 case 30:
2197 w1[3] = w1[3] | 0x020000;
2198 break;
2199
2200 case 31:
2201 w1[3] = w1[3] | 0x02000000;
2202 break;
2203
2204 case 32:
2205 w2[0] = 0x02;
2206 break;
2207
2208 case 33:
2209 w2[0] = w2[0] | 0x0200;
2210 break;
2211
2212 case 34:
2213 w2[0] = w2[0] | 0x020000;
2214 break;
2215
2216 case 35:
2217 w2[0] = w2[0] | 0x02000000;
2218 break;
2219
2220 case 36:
2221 w2[1] = 0x02;
2222 break;
2223
2224 case 37:
2225 w2[1] = w2[1] | 0x0200;
2226 break;
2227
2228 case 38:
2229 w2[1] = w2[1] | 0x020000;
2230 break;
2231
2232 case 39:
2233 w2[1] = w2[1] | 0x02000000;
2234 break;
2235
2236 case 40:
2237 w2[2] = 0x02;
2238 break;
2239
2240 case 41:
2241 w2[2] = w2[2] | 0x0200;
2242 break;
2243
2244 case 42:
2245 w2[2] = w2[2] | 0x020000;
2246 break;
2247
2248 case 43:
2249 w2[2] = w2[2] | 0x02000000;
2250 break;
2251
2252 case 44:
2253 w2[3] = 0x02;
2254 break;
2255
2256 case 45:
2257 w2[3] = w2[3] | 0x0200;
2258 break;
2259
2260 case 46:
2261 w2[3] = w2[3] | 0x020000;
2262 break;
2263
2264 case 47:
2265 w2[3] = w2[3] | 0x02000000;
2266 break;
2267
2268 case 48:
2269 w3[0] = 0x02;
2270 break;
2271
2272 case 49:
2273 w3[0] = w3[0] | 0x0200;
2274 break;
2275
2276 case 50:
2277 w3[0] = w3[0] | 0x020000;
2278 break;
2279
2280 case 51:
2281 w3[0] = w3[0] | 0x02000000;
2282 break;
2283
2284 case 52:
2285 w3[1] = 0x02;
2286 break;
2287
2288 case 53:
2289 w3[1] = w3[1] | 0x0200;
2290 break;
2291
2292 case 54:
2293 w3[1] = w3[1] | 0x020000;
2294 break;
2295
2296 case 55:
2297 w3[1] = w3[1] | 0x02000000;
2298 break;
2299
2300 case 56:
2301 w3[2] = 0x02;
2302 break;
2303
2304 case 57:
2305 w3[2] = w3[2] | 0x0200;
2306 break;
2307
2308 case 58:
2309 w3[2] = w3[2] | 0x020000;
2310 break;
2311
2312 case 59:
2313 w3[2] = w3[2] | 0x02000000;
2314 break;
2315
2316 case 60:
2317 w3[3] = 0x02;
2318 break;
2319
2320 case 61:
2321 w3[3] = w3[3] | 0x0200;
2322 break;
2323
2324 case 62:
2325 w3[3] = w3[3] | 0x020000;
2326 break;
2327
2328 case 63:
2329 w3[3] = w3[3] | 0x02000000;
2330 break;
2331
2332 case 64:
2333 w4[0] = 0x02;
2334 break;
2335
2336 case 65:
2337 w4[0] = w4[0] | 0x0200;
2338 break;
2339
2340 case 66:
2341 w4[0] = w4[0] | 0x020000;
2342 break;
2343
2344 case 67:
2345 w4[0] = w4[0] | 0x02000000;
2346 break;
2347
2348 case 68:
2349 w4[1] = 0x02;
2350 break;
2351
2352 case 69:
2353 w4[1] = w4[1] | 0x0200;
2354 break;
2355
2356 case 70:
2357 w4[1] = w4[1] | 0x020000;
2358 break;
2359
2360 case 71:
2361 w4[1] = w4[1] | 0x02000000;
2362 break;
2363
2364 case 72:
2365 w4[2] = 0x02;
2366 break;
2367
2368 case 73:
2369 w4[2] = w4[2] | 0x0200;
2370 break;
2371
2372 case 74:
2373 w4[2] = w4[2] | 0x020000;
2374 break;
2375
2376 case 75:
2377 w4[2] = w4[2] | 0x02000000;
2378 break;
2379
2380 case 76:
2381 w4[3] = 0x02;
2382 break;
2383
2384 case 77:
2385 w4[3] = w4[3] | 0x0200;
2386 break;
2387
2388 case 78:
2389 w4[3] = w4[3] | 0x020000;
2390 break;
2391
2392 case 79:
2393 w4[3] = w4[3] | 0x02000000;
2394 break;
2395
2396 case 80:
2397 w5[0] = 0x02;
2398 break;
2399
2400 case 81:
2401 w5[0] = w5[0] | 0x0200;
2402 break;
2403
2404 case 82:
2405 w5[0] = w5[0] | 0x020000;
2406 break;
2407
2408 case 83:
2409 w5[0] = w5[0] | 0x02000000;
2410 break;
2411
2412 case 84:
2413 w5[1] = 0x02;
2414 break;
2415
2416 case 85:
2417 w5[1] = w5[1] | 0x0200;
2418 break;
2419
2420 case 86:
2421 w5[1] = w5[1] | 0x020000;
2422 break;
2423
2424 case 87:
2425 w5[1] = w5[1] | 0x02000000;
2426 break;
2427
2428 case 88:
2429 w5[2] = 0x02;
2430 break;
2431
2432 case 89:
2433 w5[2] = w5[2] | 0x0200;
2434 break;
2435
2436 case 90:
2437 w5[2] = w5[2] | 0x020000;
2438 break;
2439
2440 case 91:
2441 w5[2] = w5[2] | 0x02000000;
2442 break;
2443
2444 case 92:
2445 w5[3] = 0x02;
2446 break;
2447
2448 case 93:
2449 w5[3] = w5[3] | 0x0200;
2450 break;
2451
2452 case 94:
2453 w5[3] = w5[3] | 0x020000;
2454 break;
2455
2456 case 95:
2457 w5[3] = w5[3] | 0x02000000;
2458 break;
2459
2460 case 96:
2461 w6[0] = 0x02;
2462 break;
2463
2464 case 97:
2465 w6[0] = w6[0] | 0x0200;
2466 break;
2467
2468 case 98:
2469 w6[0] = w6[0] | 0x020000;
2470 break;
2471
2472 case 99:
2473 w6[0] = w6[0] | 0x02000000;
2474 break;
2475
2476 case 100:
2477 w6[1] = 0x02;
2478 break;
2479
2480 case 101:
2481 w6[1] = w6[1] | 0x0200;
2482 break;
2483
2484 case 102:
2485 w6[1] = w6[1] | 0x020000;
2486 break;
2487
2488 case 103:
2489 w6[1] = w6[1] | 0x02000000;
2490 break;
2491
2492 case 104:
2493 w6[2] = 0x02;
2494 break;
2495
2496 case 105:
2497 w6[2] = w6[2] | 0x0200;
2498 break;
2499
2500 case 106:
2501 w6[2] = w6[2] | 0x020000;
2502 break;
2503
2504 case 107:
2505 w6[2] = w6[2] | 0x02000000;
2506 break;
2507
2508 case 108:
2509 w6[3] = 0x02;
2510 break;
2511
2512 case 109:
2513 w6[3] = w6[3] | 0x0200;
2514 break;
2515
2516 case 110:
2517 w6[3] = w6[3] | 0x020000;
2518 break;
2519
2520 case 111:
2521 w6[3] = w6[3] | 0x02000000;
2522 break;
2523
2524 case 112:
2525 w7[0] = 0x02;
2526 break;
2527
2528 case 113:
2529 w7[0] = w7[0] | 0x0200;
2530 break;
2531
2532 case 114:
2533 w7[0] = w7[0] | 0x020000;
2534 break;
2535
2536 case 115:
2537 w7[0] = w7[0] | 0x02000000;
2538 break;
2539
2540 case 116:
2541 w7[1] = 0x02;
2542 break;
2543
2544 case 117:
2545 w7[1] = w7[1] | 0x0200;
2546 break;
2547
2548 case 118:
2549 w7[1] = w7[1] | 0x020000;
2550 break;
2551
2552 case 119:
2553 w7[1] = w7[1] | 0x02000000;
2554 break;
2555
2556 case 120:
2557 w7[2] = 0x02;
2558 break;
2559
2560 case 121:
2561 w7[2] = w7[2] | 0x0200;
2562 break;
2563
2564 case 122:
2565 w7[2] = w7[2] | 0x020000;
2566 break;
2567
2568 case 123:
2569 w7[2] = w7[2] | 0x02000000;
2570 break;
2571
2572 case 124:
2573 w7[3] = 0x02;
2574 break;
2575
2576 case 125:
2577 w7[3] = w7[3] | 0x0200;
2578 break;
2579
2580 case 126:
2581 w7[3] = w7[3] | 0x020000;
2582 break;
2583
2584 case 127:
2585 w7[3] = w7[3] | 0x02000000;
2586 break;
2587 }
2588 }
2589
2590 static void append_0x80_1x4 (u32 w0[4], const u32 offset)
2591 {
2592 switch (offset)
2593 {
2594 case 0:
2595 w0[0] = 0x80;
2596 break;
2597
2598 case 1:
2599 w0[0] = w0[0] | 0x8000;
2600 break;
2601
2602 case 2:
2603 w0[0] = w0[0] | 0x800000;
2604 break;
2605
2606 case 3:
2607 w0[0] = w0[0] | 0x80000000;
2608 break;
2609
2610 case 4:
2611 w0[1] = 0x80;
2612 break;
2613
2614 case 5:
2615 w0[1] = w0[1] | 0x8000;
2616 break;
2617
2618 case 6:
2619 w0[1] = w0[1] | 0x800000;
2620 break;
2621
2622 case 7:
2623 w0[1] = w0[1] | 0x80000000;
2624 break;
2625
2626 case 8:
2627 w0[2] = 0x80;
2628 break;
2629
2630 case 9:
2631 w0[2] = w0[2] | 0x8000;
2632 break;
2633
2634 case 10:
2635 w0[2] = w0[2] | 0x800000;
2636 break;
2637
2638 case 11:
2639 w0[2] = w0[2] | 0x80000000;
2640 break;
2641
2642 case 12:
2643 w0[3] = 0x80;
2644 break;
2645
2646 case 13:
2647 w0[3] = w0[3] | 0x8000;
2648 break;
2649
2650 case 14:
2651 w0[3] = w0[3] | 0x800000;
2652 break;
2653
2654 case 15:
2655 w0[3] = w0[3] | 0x80000000;
2656 break;
2657 }
2658 }
2659
2660 static void append_0x80_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
2661 {
2662 switch (offset)
2663 {
2664 case 0:
2665 w0[0] = 0x80;
2666 break;
2667
2668 case 1:
2669 w0[0] = w0[0] | 0x8000;
2670 break;
2671
2672 case 2:
2673 w0[0] = w0[0] | 0x800000;
2674 break;
2675
2676 case 3:
2677 w0[0] = w0[0] | 0x80000000;
2678 break;
2679
2680 case 4:
2681 w0[1] = 0x80;
2682 break;
2683
2684 case 5:
2685 w0[1] = w0[1] | 0x8000;
2686 break;
2687
2688 case 6:
2689 w0[1] = w0[1] | 0x800000;
2690 break;
2691
2692 case 7:
2693 w0[1] = w0[1] | 0x80000000;
2694 break;
2695
2696 case 8:
2697 w0[2] = 0x80;
2698 break;
2699
2700 case 9:
2701 w0[2] = w0[2] | 0x8000;
2702 break;
2703
2704 case 10:
2705 w0[2] = w0[2] | 0x800000;
2706 break;
2707
2708 case 11:
2709 w0[2] = w0[2] | 0x80000000;
2710 break;
2711
2712 case 12:
2713 w0[3] = 0x80;
2714 break;
2715
2716 case 13:
2717 w0[3] = w0[3] | 0x8000;
2718 break;
2719
2720 case 14:
2721 w0[3] = w0[3] | 0x800000;
2722 break;
2723
2724 case 15:
2725 w0[3] = w0[3] | 0x80000000;
2726 break;
2727
2728 case 16:
2729 w1[0] = 0x80;
2730 break;
2731
2732 case 17:
2733 w1[0] = w1[0] | 0x8000;
2734 break;
2735
2736 case 18:
2737 w1[0] = w1[0] | 0x800000;
2738 break;
2739
2740 case 19:
2741 w1[0] = w1[0] | 0x80000000;
2742 break;
2743
2744 case 20:
2745 w1[1] = 0x80;
2746 break;
2747
2748 case 21:
2749 w1[1] = w1[1] | 0x8000;
2750 break;
2751
2752 case 22:
2753 w1[1] = w1[1] | 0x800000;
2754 break;
2755
2756 case 23:
2757 w1[1] = w1[1] | 0x80000000;
2758 break;
2759
2760 case 24:
2761 w1[2] = 0x80;
2762 break;
2763
2764 case 25:
2765 w1[2] = w1[2] | 0x8000;
2766 break;
2767
2768 case 26:
2769 w1[2] = w1[2] | 0x800000;
2770 break;
2771
2772 case 27:
2773 w1[2] = w1[2] | 0x80000000;
2774 break;
2775
2776 case 28:
2777 w1[3] = 0x80;
2778 break;
2779
2780 case 29:
2781 w1[3] = w1[3] | 0x8000;
2782 break;
2783
2784 case 30:
2785 w1[3] = w1[3] | 0x800000;
2786 break;
2787
2788 case 31:
2789 w1[3] = w1[3] | 0x80000000;
2790 break;
2791 }
2792 }
2793
2794 static void append_0x80_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
2795 {
2796 switch (offset)
2797 {
2798 case 0:
2799 w0[0] = 0x80;
2800 break;
2801
2802 case 1:
2803 w0[0] = w0[0] | 0x8000;
2804 break;
2805
2806 case 2:
2807 w0[0] = w0[0] | 0x800000;
2808 break;
2809
2810 case 3:
2811 w0[0] = w0[0] | 0x80000000;
2812 break;
2813
2814 case 4:
2815 w0[1] = 0x80;
2816 break;
2817
2818 case 5:
2819 w0[1] = w0[1] | 0x8000;
2820 break;
2821
2822 case 6:
2823 w0[1] = w0[1] | 0x800000;
2824 break;
2825
2826 case 7:
2827 w0[1] = w0[1] | 0x80000000;
2828 break;
2829
2830 case 8:
2831 w0[2] = 0x80;
2832 break;
2833
2834 case 9:
2835 w0[2] = w0[2] | 0x8000;
2836 break;
2837
2838 case 10:
2839 w0[2] = w0[2] | 0x800000;
2840 break;
2841
2842 case 11:
2843 w0[2] = w0[2] | 0x80000000;
2844 break;
2845
2846 case 12:
2847 w0[3] = 0x80;
2848 break;
2849
2850 case 13:
2851 w0[3] = w0[3] | 0x8000;
2852 break;
2853
2854 case 14:
2855 w0[3] = w0[3] | 0x800000;
2856 break;
2857
2858 case 15:
2859 w0[3] = w0[3] | 0x80000000;
2860 break;
2861
2862 case 16:
2863 w1[0] = 0x80;
2864 break;
2865
2866 case 17:
2867 w1[0] = w1[0] | 0x8000;
2868 break;
2869
2870 case 18:
2871 w1[0] = w1[0] | 0x800000;
2872 break;
2873
2874 case 19:
2875 w1[0] = w1[0] | 0x80000000;
2876 break;
2877
2878 case 20:
2879 w1[1] = 0x80;
2880 break;
2881
2882 case 21:
2883 w1[1] = w1[1] | 0x8000;
2884 break;
2885
2886 case 22:
2887 w1[1] = w1[1] | 0x800000;
2888 break;
2889
2890 case 23:
2891 w1[1] = w1[1] | 0x80000000;
2892 break;
2893
2894 case 24:
2895 w1[2] = 0x80;
2896 break;
2897
2898 case 25:
2899 w1[2] = w1[2] | 0x8000;
2900 break;
2901
2902 case 26:
2903 w1[2] = w1[2] | 0x800000;
2904 break;
2905
2906 case 27:
2907 w1[2] = w1[2] | 0x80000000;
2908 break;
2909
2910 case 28:
2911 w1[3] = 0x80;
2912 break;
2913
2914 case 29:
2915 w1[3] = w1[3] | 0x8000;
2916 break;
2917
2918 case 30:
2919 w1[3] = w1[3] | 0x800000;
2920 break;
2921
2922 case 31:
2923 w1[3] = w1[3] | 0x80000000;
2924 break;
2925
2926 case 32:
2927 w2[0] = 0x80;
2928 break;
2929
2930 case 33:
2931 w2[0] = w2[0] | 0x8000;
2932 break;
2933
2934 case 34:
2935 w2[0] = w2[0] | 0x800000;
2936 break;
2937
2938 case 35:
2939 w2[0] = w2[0] | 0x80000000;
2940 break;
2941
2942 case 36:
2943 w2[1] = 0x80;
2944 break;
2945
2946 case 37:
2947 w2[1] = w2[1] | 0x8000;
2948 break;
2949
2950 case 38:
2951 w2[1] = w2[1] | 0x800000;
2952 break;
2953
2954 case 39:
2955 w2[1] = w2[1] | 0x80000000;
2956 break;
2957
2958 case 40:
2959 w2[2] = 0x80;
2960 break;
2961
2962 case 41:
2963 w2[2] = w2[2] | 0x8000;
2964 break;
2965
2966 case 42:
2967 w2[2] = w2[2] | 0x800000;
2968 break;
2969
2970 case 43:
2971 w2[2] = w2[2] | 0x80000000;
2972 break;
2973
2974 case 44:
2975 w2[3] = 0x80;
2976 break;
2977
2978 case 45:
2979 w2[3] = w2[3] | 0x8000;
2980 break;
2981
2982 case 46:
2983 w2[3] = w2[3] | 0x800000;
2984 break;
2985
2986 case 47:
2987 w2[3] = w2[3] | 0x80000000;
2988 break;
2989 }
2990 }
2991
2992 static void append_0x80_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
2993 {
2994 switch (offset)
2995 {
2996 case 0:
2997 w0[0] = 0x80;
2998 break;
2999
3000 case 1:
3001 w0[0] = w0[0] | 0x8000;
3002 break;
3003
3004 case 2:
3005 w0[0] = w0[0] | 0x800000;
3006 break;
3007
3008 case 3:
3009 w0[0] = w0[0] | 0x80000000;
3010 break;
3011
3012 case 4:
3013 w0[1] = 0x80;
3014 break;
3015
3016 case 5:
3017 w0[1] = w0[1] | 0x8000;
3018 break;
3019
3020 case 6:
3021 w0[1] = w0[1] | 0x800000;
3022 break;
3023
3024 case 7:
3025 w0[1] = w0[1] | 0x80000000;
3026 break;
3027
3028 case 8:
3029 w0[2] = 0x80;
3030 break;
3031
3032 case 9:
3033 w0[2] = w0[2] | 0x8000;
3034 break;
3035
3036 case 10:
3037 w0[2] = w0[2] | 0x800000;
3038 break;
3039
3040 case 11:
3041 w0[2] = w0[2] | 0x80000000;
3042 break;
3043
3044 case 12:
3045 w0[3] = 0x80;
3046 break;
3047
3048 case 13:
3049 w0[3] = w0[3] | 0x8000;
3050 break;
3051
3052 case 14:
3053 w0[3] = w0[3] | 0x800000;
3054 break;
3055
3056 case 15:
3057 w0[3] = w0[3] | 0x80000000;
3058 break;
3059
3060 case 16:
3061 w1[0] = 0x80;
3062 break;
3063
3064 case 17:
3065 w1[0] = w1[0] | 0x8000;
3066 break;
3067
3068 case 18:
3069 w1[0] = w1[0] | 0x800000;
3070 break;
3071
3072 case 19:
3073 w1[0] = w1[0] | 0x80000000;
3074 break;
3075
3076 case 20:
3077 w1[1] = 0x80;
3078 break;
3079
3080 case 21:
3081 w1[1] = w1[1] | 0x8000;
3082 break;
3083
3084 case 22:
3085 w1[1] = w1[1] | 0x800000;
3086 break;
3087
3088 case 23:
3089 w1[1] = w1[1] | 0x80000000;
3090 break;
3091
3092 case 24:
3093 w1[2] = 0x80;
3094 break;
3095
3096 case 25:
3097 w1[2] = w1[2] | 0x8000;
3098 break;
3099
3100 case 26:
3101 w1[2] = w1[2] | 0x800000;
3102 break;
3103
3104 case 27:
3105 w1[2] = w1[2] | 0x80000000;
3106 break;
3107
3108 case 28:
3109 w1[3] = 0x80;
3110 break;
3111
3112 case 29:
3113 w1[3] = w1[3] | 0x8000;
3114 break;
3115
3116 case 30:
3117 w1[3] = w1[3] | 0x800000;
3118 break;
3119
3120 case 31:
3121 w1[3] = w1[3] | 0x80000000;
3122 break;
3123
3124 case 32:
3125 w2[0] = 0x80;
3126 break;
3127
3128 case 33:
3129 w2[0] = w2[0] | 0x8000;
3130 break;
3131
3132 case 34:
3133 w2[0] = w2[0] | 0x800000;
3134 break;
3135
3136 case 35:
3137 w2[0] = w2[0] | 0x80000000;
3138 break;
3139
3140 case 36:
3141 w2[1] = 0x80;
3142 break;
3143
3144 case 37:
3145 w2[1] = w2[1] | 0x8000;
3146 break;
3147
3148 case 38:
3149 w2[1] = w2[1] | 0x800000;
3150 break;
3151
3152 case 39:
3153 w2[1] = w2[1] | 0x80000000;
3154 break;
3155
3156 case 40:
3157 w2[2] = 0x80;
3158 break;
3159
3160 case 41:
3161 w2[2] = w2[2] | 0x8000;
3162 break;
3163
3164 case 42:
3165 w2[2] = w2[2] | 0x800000;
3166 break;
3167
3168 case 43:
3169 w2[2] = w2[2] | 0x80000000;
3170 break;
3171
3172 case 44:
3173 w2[3] = 0x80;
3174 break;
3175
3176 case 45:
3177 w2[3] = w2[3] | 0x8000;
3178 break;
3179
3180 case 46:
3181 w2[3] = w2[3] | 0x800000;
3182 break;
3183
3184 case 47:
3185 w2[3] = w2[3] | 0x80000000;
3186 break;
3187
3188 case 48:
3189 w3[0] = 0x80;
3190 break;
3191
3192 case 49:
3193 w3[0] = w3[0] | 0x8000;
3194 break;
3195
3196 case 50:
3197 w3[0] = w3[0] | 0x800000;
3198 break;
3199
3200 case 51:
3201 w3[0] = w3[0] | 0x80000000;
3202 break;
3203
3204 case 52:
3205 w3[1] = 0x80;
3206 break;
3207
3208 case 53:
3209 w3[1] = w3[1] | 0x8000;
3210 break;
3211
3212 case 54:
3213 w3[1] = w3[1] | 0x800000;
3214 break;
3215
3216 case 55:
3217 w3[1] = w3[1] | 0x80000000;
3218 break;
3219
3220 case 56:
3221 w3[2] = 0x80;
3222 break;
3223
3224 case 57:
3225 w3[2] = w3[2] | 0x8000;
3226 break;
3227
3228 case 58:
3229 w3[2] = w3[2] | 0x800000;
3230 break;
3231
3232 case 59:
3233 w3[2] = w3[2] | 0x80000000;
3234 break;
3235
3236 case 60:
3237 w3[3] = 0x80;
3238 break;
3239
3240 case 61:
3241 w3[3] = w3[3] | 0x8000;
3242 break;
3243
3244 case 62:
3245 w3[3] = w3[3] | 0x800000;
3246 break;
3247
3248 case 63:
3249 w3[3] = w3[3] | 0x80000000;
3250 break;
3251 }
3252 }
3253
3254 static void append_0x80_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
3255 {
3256 switch (offset)
3257 {
3258 case 0:
3259 w0[0] = 0x80;
3260 break;
3261
3262 case 1:
3263 w0[0] = w0[0] | 0x8000;
3264 break;
3265
3266 case 2:
3267 w0[0] = w0[0] | 0x800000;
3268 break;
3269
3270 case 3:
3271 w0[0] = w0[0] | 0x80000000;
3272 break;
3273
3274 case 4:
3275 w0[1] = 0x80;
3276 break;
3277
3278 case 5:
3279 w0[1] = w0[1] | 0x8000;
3280 break;
3281
3282 case 6:
3283 w0[1] = w0[1] | 0x800000;
3284 break;
3285
3286 case 7:
3287 w0[1] = w0[1] | 0x80000000;
3288 break;
3289
3290 case 8:
3291 w0[2] = 0x80;
3292 break;
3293
3294 case 9:
3295 w0[2] = w0[2] | 0x8000;
3296 break;
3297
3298 case 10:
3299 w0[2] = w0[2] | 0x800000;
3300 break;
3301
3302 case 11:
3303 w0[2] = w0[2] | 0x80000000;
3304 break;
3305
3306 case 12:
3307 w0[3] = 0x80;
3308 break;
3309
3310 case 13:
3311 w0[3] = w0[3] | 0x8000;
3312 break;
3313
3314 case 14:
3315 w0[3] = w0[3] | 0x800000;
3316 break;
3317
3318 case 15:
3319 w0[3] = w0[3] | 0x80000000;
3320 break;
3321
3322 case 16:
3323 w1[0] = 0x80;
3324 break;
3325
3326 case 17:
3327 w1[0] = w1[0] | 0x8000;
3328 break;
3329
3330 case 18:
3331 w1[0] = w1[0] | 0x800000;
3332 break;
3333
3334 case 19:
3335 w1[0] = w1[0] | 0x80000000;
3336 break;
3337
3338 case 20:
3339 w1[1] = 0x80;
3340 break;
3341
3342 case 21:
3343 w1[1] = w1[1] | 0x8000;
3344 break;
3345
3346 case 22:
3347 w1[1] = w1[1] | 0x800000;
3348 break;
3349
3350 case 23:
3351 w1[1] = w1[1] | 0x80000000;
3352 break;
3353
3354 case 24:
3355 w1[2] = 0x80;
3356 break;
3357
3358 case 25:
3359 w1[2] = w1[2] | 0x8000;
3360 break;
3361
3362 case 26:
3363 w1[2] = w1[2] | 0x800000;
3364 break;
3365
3366 case 27:
3367 w1[2] = w1[2] | 0x80000000;
3368 break;
3369
3370 case 28:
3371 w1[3] = 0x80;
3372 break;
3373
3374 case 29:
3375 w1[3] = w1[3] | 0x8000;
3376 break;
3377
3378 case 30:
3379 w1[3] = w1[3] | 0x800000;
3380 break;
3381
3382 case 31:
3383 w1[3] = w1[3] | 0x80000000;
3384 break;
3385
3386 case 32:
3387 w2[0] = 0x80;
3388 break;
3389
3390 case 33:
3391 w2[0] = w2[0] | 0x8000;
3392 break;
3393
3394 case 34:
3395 w2[0] = w2[0] | 0x800000;
3396 break;
3397
3398 case 35:
3399 w2[0] = w2[0] | 0x80000000;
3400 break;
3401
3402 case 36:
3403 w2[1] = 0x80;
3404 break;
3405
3406 case 37:
3407 w2[1] = w2[1] | 0x8000;
3408 break;
3409
3410 case 38:
3411 w2[1] = w2[1] | 0x800000;
3412 break;
3413
3414 case 39:
3415 w2[1] = w2[1] | 0x80000000;
3416 break;
3417
3418 case 40:
3419 w2[2] = 0x80;
3420 break;
3421
3422 case 41:
3423 w2[2] = w2[2] | 0x8000;
3424 break;
3425
3426 case 42:
3427 w2[2] = w2[2] | 0x800000;
3428 break;
3429
3430 case 43:
3431 w2[2] = w2[2] | 0x80000000;
3432 break;
3433
3434 case 44:
3435 w2[3] = 0x80;
3436 break;
3437
3438 case 45:
3439 w2[3] = w2[3] | 0x8000;
3440 break;
3441
3442 case 46:
3443 w2[3] = w2[3] | 0x800000;
3444 break;
3445
3446 case 47:
3447 w2[3] = w2[3] | 0x80000000;
3448 break;
3449
3450 case 48:
3451 w3[0] = 0x80;
3452 break;
3453
3454 case 49:
3455 w3[0] = w3[0] | 0x8000;
3456 break;
3457
3458 case 50:
3459 w3[0] = w3[0] | 0x800000;
3460 break;
3461
3462 case 51:
3463 w3[0] = w3[0] | 0x80000000;
3464 break;
3465
3466 case 52:
3467 w3[1] = 0x80;
3468 break;
3469
3470 case 53:
3471 w3[1] = w3[1] | 0x8000;
3472 break;
3473
3474 case 54:
3475 w3[1] = w3[1] | 0x800000;
3476 break;
3477
3478 case 55:
3479 w3[1] = w3[1] | 0x80000000;
3480 break;
3481
3482 case 56:
3483 w3[2] = 0x80;
3484 break;
3485
3486 case 57:
3487 w3[2] = w3[2] | 0x8000;
3488 break;
3489
3490 case 58:
3491 w3[2] = w3[2] | 0x800000;
3492 break;
3493
3494 case 59:
3495 w3[2] = w3[2] | 0x80000000;
3496 break;
3497
3498 case 60:
3499 w3[3] = 0x80;
3500 break;
3501
3502 case 61:
3503 w3[3] = w3[3] | 0x8000;
3504 break;
3505
3506 case 62:
3507 w3[3] = w3[3] | 0x800000;
3508 break;
3509
3510 case 63:
3511 w3[3] = w3[3] | 0x80000000;
3512 break;
3513
3514 case 64:
3515 w4[0] = 0x80;
3516 break;
3517
3518 case 65:
3519 w4[0] = w4[0] | 0x8000;
3520 break;
3521
3522 case 66:
3523 w4[0] = w4[0] | 0x800000;
3524 break;
3525
3526 case 67:
3527 w4[0] = w4[0] | 0x80000000;
3528 break;
3529
3530 case 68:
3531 w4[1] = 0x80;
3532 break;
3533
3534 case 69:
3535 w4[1] = w4[1] | 0x8000;
3536 break;
3537
3538 case 70:
3539 w4[1] = w4[1] | 0x800000;
3540 break;
3541
3542 case 71:
3543 w4[1] = w4[1] | 0x80000000;
3544 break;
3545
3546 case 72:
3547 w4[2] = 0x80;
3548 break;
3549
3550 case 73:
3551 w4[2] = w4[2] | 0x8000;
3552 break;
3553
3554 case 74:
3555 w4[2] = w4[2] | 0x800000;
3556 break;
3557
3558 case 75:
3559 w4[2] = w4[2] | 0x80000000;
3560 break;
3561
3562 case 76:
3563 w4[3] = 0x80;
3564 break;
3565
3566 case 77:
3567 w4[3] = w4[3] | 0x8000;
3568 break;
3569
3570 case 78:
3571 w4[3] = w4[3] | 0x800000;
3572 break;
3573
3574 case 79:
3575 w4[3] = w4[3] | 0x80000000;
3576 break;
3577
3578 case 80:
3579 w5[0] = 0x80;
3580 break;
3581
3582 case 81:
3583 w5[0] = w5[0] | 0x8000;
3584 break;
3585
3586 case 82:
3587 w5[0] = w5[0] | 0x800000;
3588 break;
3589
3590 case 83:
3591 w5[0] = w5[0] | 0x80000000;
3592 break;
3593
3594 case 84:
3595 w5[1] = 0x80;
3596 break;
3597
3598 case 85:
3599 w5[1] = w5[1] | 0x8000;
3600 break;
3601
3602 case 86:
3603 w5[1] = w5[1] | 0x800000;
3604 break;
3605
3606 case 87:
3607 w5[1] = w5[1] | 0x80000000;
3608 break;
3609
3610 case 88:
3611 w5[2] = 0x80;
3612 break;
3613
3614 case 89:
3615 w5[2] = w5[2] | 0x8000;
3616 break;
3617
3618 case 90:
3619 w5[2] = w5[2] | 0x800000;
3620 break;
3621
3622 case 91:
3623 w5[2] = w5[2] | 0x80000000;
3624 break;
3625
3626 case 92:
3627 w5[3] = 0x80;
3628 break;
3629
3630 case 93:
3631 w5[3] = w5[3] | 0x8000;
3632 break;
3633
3634 case 94:
3635 w5[3] = w5[3] | 0x800000;
3636 break;
3637
3638 case 95:
3639 w5[3] = w5[3] | 0x80000000;
3640 break;
3641
3642 case 96:
3643 w6[0] = 0x80;
3644 break;
3645
3646 case 97:
3647 w6[0] = w6[0] | 0x8000;
3648 break;
3649
3650 case 98:
3651 w6[0] = w6[0] | 0x800000;
3652 break;
3653
3654 case 99:
3655 w6[0] = w6[0] | 0x80000000;
3656 break;
3657
3658 case 100:
3659 w6[1] = 0x80;
3660 break;
3661
3662 case 101:
3663 w6[1] = w6[1] | 0x8000;
3664 break;
3665
3666 case 102:
3667 w6[1] = w6[1] | 0x800000;
3668 break;
3669
3670 case 103:
3671 w6[1] = w6[1] | 0x80000000;
3672 break;
3673
3674 case 104:
3675 w6[2] = 0x80;
3676 break;
3677
3678 case 105:
3679 w6[2] = w6[2] | 0x8000;
3680 break;
3681
3682 case 106:
3683 w6[2] = w6[2] | 0x800000;
3684 break;
3685
3686 case 107:
3687 w6[2] = w6[2] | 0x80000000;
3688 break;
3689
3690 case 108:
3691 w6[3] = 0x80;
3692 break;
3693
3694 case 109:
3695 w6[3] = w6[3] | 0x8000;
3696 break;
3697
3698 case 110:
3699 w6[3] = w6[3] | 0x800000;
3700 break;
3701
3702 case 111:
3703 w6[3] = w6[3] | 0x80000000;
3704 break;
3705
3706 case 112:
3707 w7[0] = 0x80;
3708 break;
3709
3710 case 113:
3711 w7[0] = w7[0] | 0x8000;
3712 break;
3713
3714 case 114:
3715 w7[0] = w7[0] | 0x800000;
3716 break;
3717
3718 case 115:
3719 w7[0] = w7[0] | 0x80000000;
3720 break;
3721
3722 case 116:
3723 w7[1] = 0x80;
3724 break;
3725
3726 case 117:
3727 w7[1] = w7[1] | 0x8000;
3728 break;
3729
3730 case 118:
3731 w7[1] = w7[1] | 0x800000;
3732 break;
3733
3734 case 119:
3735 w7[1] = w7[1] | 0x80000000;
3736 break;
3737
3738 case 120:
3739 w7[2] = 0x80;
3740 break;
3741
3742 case 121:
3743 w7[2] = w7[2] | 0x8000;
3744 break;
3745
3746 case 122:
3747 w7[2] = w7[2] | 0x800000;
3748 break;
3749
3750 case 123:
3751 w7[2] = w7[2] | 0x80000000;
3752 break;
3753
3754 case 124:
3755 w7[3] = 0x80;
3756 break;
3757
3758 case 125:
3759 w7[3] = w7[3] | 0x8000;
3760 break;
3761
3762 case 126:
3763 w7[3] = w7[3] | 0x800000;
3764 break;
3765
3766 case 127:
3767 w7[3] = w7[3] | 0x80000000;
3768 break;
3769 }
3770 }
3771
3772 static void append_0x80_1x16 (u32 w[16], const u32 offset)
3773 {
3774 switch (offset)
3775 {
3776 case 0:
3777 w[ 0] = 0x80;
3778 break;
3779
3780 case 1:
3781 w[ 0] = w[ 0] | 0x8000;
3782 break;
3783
3784 case 2:
3785 w[ 0] = w[ 0] | 0x800000;
3786 break;
3787
3788 case 3:
3789 w[ 0] = w[ 0] | 0x80000000;
3790 break;
3791
3792 case 4:
3793 w[ 1] = 0x80;
3794 break;
3795
3796 case 5:
3797 w[ 1] = w[ 1] | 0x8000;
3798 break;
3799
3800 case 6:
3801 w[ 1] = w[ 1] | 0x800000;
3802 break;
3803
3804 case 7:
3805 w[ 1] = w[ 1] | 0x80000000;
3806 break;
3807
3808 case 8:
3809 w[ 2] = 0x80;
3810 break;
3811
3812 case 9:
3813 w[ 2] = w[ 2] | 0x8000;
3814 break;
3815
3816 case 10:
3817 w[ 2] = w[ 2] | 0x800000;
3818 break;
3819
3820 case 11:
3821 w[ 2] = w[ 2] | 0x80000000;
3822 break;
3823
3824 case 12:
3825 w[ 3] = 0x80;
3826 break;
3827
3828 case 13:
3829 w[ 3] = w[ 3] | 0x8000;
3830 break;
3831
3832 case 14:
3833 w[ 3] = w[ 3] | 0x800000;
3834 break;
3835
3836 case 15:
3837 w[ 3] = w[ 3] | 0x80000000;
3838 break;
3839
3840 case 16:
3841 w[ 4] = 0x80;
3842 break;
3843
3844 case 17:
3845 w[ 4] = w[ 4] | 0x8000;
3846 break;
3847
3848 case 18:
3849 w[ 4] = w[ 4] | 0x800000;
3850 break;
3851
3852 case 19:
3853 w[ 4] = w[ 4] | 0x80000000;
3854 break;
3855
3856 case 20:
3857 w[ 5] = 0x80;
3858 break;
3859
3860 case 21:
3861 w[ 5] = w[ 5] | 0x8000;
3862 break;
3863
3864 case 22:
3865 w[ 5] = w[ 5] | 0x800000;
3866 break;
3867
3868 case 23:
3869 w[ 5] = w[ 5] | 0x80000000;
3870 break;
3871
3872 case 24:
3873 w[ 6] = 0x80;
3874 break;
3875
3876 case 25:
3877 w[ 6] = w[ 6] | 0x8000;
3878 break;
3879
3880 case 26:
3881 w[ 6] = w[ 6] | 0x800000;
3882 break;
3883
3884 case 27:
3885 w[ 6] = w[ 6] | 0x80000000;
3886 break;
3887
3888 case 28:
3889 w[ 7] = 0x80;
3890 break;
3891
3892 case 29:
3893 w[ 7] = w[ 7] | 0x8000;
3894 break;
3895
3896 case 30:
3897 w[ 7] = w[ 7] | 0x800000;
3898 break;
3899
3900 case 31:
3901 w[ 7] = w[ 7] | 0x80000000;
3902 break;
3903
3904 case 32:
3905 w[ 8] = 0x80;
3906 break;
3907
3908 case 33:
3909 w[ 8] = w[ 8] | 0x8000;
3910 break;
3911
3912 case 34:
3913 w[ 8] = w[ 8] | 0x800000;
3914 break;
3915
3916 case 35:
3917 w[ 8] = w[ 8] | 0x80000000;
3918 break;
3919
3920 case 36:
3921 w[ 9] = 0x80;
3922 break;
3923
3924 case 37:
3925 w[ 9] = w[ 9] | 0x8000;
3926 break;
3927
3928 case 38:
3929 w[ 9] = w[ 9] | 0x800000;
3930 break;
3931
3932 case 39:
3933 w[ 9] = w[ 9] | 0x80000000;
3934 break;
3935
3936 case 40:
3937 w[10] = 0x80;
3938 break;
3939
3940 case 41:
3941 w[10] = w[10] | 0x8000;
3942 break;
3943
3944 case 42:
3945 w[10] = w[10] | 0x800000;
3946 break;
3947
3948 case 43:
3949 w[10] = w[10] | 0x80000000;
3950 break;
3951
3952 case 44:
3953 w[11] = 0x80;
3954 break;
3955
3956 case 45:
3957 w[11] = w[11] | 0x8000;
3958 break;
3959
3960 case 46:
3961 w[11] = w[11] | 0x800000;
3962 break;
3963
3964 case 47:
3965 w[11] = w[11] | 0x80000000;
3966 break;
3967
3968 case 48:
3969 w[12] = 0x80;
3970 break;
3971
3972 case 49:
3973 w[12] = w[12] | 0x8000;
3974 break;
3975
3976 case 50:
3977 w[12] = w[12] | 0x800000;
3978 break;
3979
3980 case 51:
3981 w[12] = w[12] | 0x80000000;
3982 break;
3983
3984 case 52:
3985 w[13] = 0x80;
3986 break;
3987
3988 case 53:
3989 w[13] = w[13] | 0x8000;
3990 break;
3991
3992 case 54:
3993 w[13] = w[13] | 0x800000;
3994 break;
3995
3996 case 55:
3997 w[13] = w[13] | 0x80000000;
3998 break;
3999
4000 case 56:
4001 w[14] = 0x80;
4002 break;
4003
4004 case 57:
4005 w[14] = w[14] | 0x8000;
4006 break;
4007
4008 case 58:
4009 w[14] = w[14] | 0x800000;
4010 break;
4011
4012 case 59:
4013 w[14] = w[14] | 0x80000000;
4014 break;
4015
4016 case 60:
4017 w[15] = 0x80;
4018 break;
4019
4020 case 61:
4021 w[15] = w[15] | 0x8000;
4022 break;
4023
4024 case 62:
4025 w[15] = w[15] | 0x800000;
4026 break;
4027
4028 case 63:
4029 w[15] = w[15] | 0x80000000;
4030 break;
4031 }
4032 }
4033
4034 static void switch_buffer_by_offset_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
4035 {
4036 #if defined IS_AMD || defined IS_GENERIC
4037 const int offset_mod_4 = offset & 3;
4038
4039 const int offset_minus_4 = 4 - offset;
4040
4041 switch (offset / 4)
4042 {
4043 case 0:
4044 w3[2] = amd_bytealign_S ( 0, w3[1], offset_minus_4);
4045 w3[1] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
4046 w3[0] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
4047 w2[3] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
4048 w2[2] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
4049 w2[1] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
4050 w2[0] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
4051 w1[3] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
4052 w1[2] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
4053 w1[1] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
4054 w1[0] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4055 w0[3] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4056 w0[2] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4057 w0[1] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4058 w0[0] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4059
4060 if (offset_mod_4 == 0)
4061 {
4062 w0[0] = w0[1];
4063 w0[1] = w0[2];
4064 w0[2] = w0[3];
4065 w0[3] = w1[0];
4066 w1[0] = w1[1];
4067 w1[1] = w1[2];
4068 w1[2] = w1[3];
4069 w1[3] = w2[0];
4070 w2[0] = w2[1];
4071 w2[1] = w2[2];
4072 w2[2] = w2[3];
4073 w2[3] = w3[0];
4074 w3[0] = w3[1];
4075 w3[1] = w3[2];
4076 w3[2] = 0;
4077 }
4078
4079 break;
4080
4081 case 1:
4082 w3[2] = amd_bytealign_S ( 0, w3[0], offset_minus_4);
4083 w3[1] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
4084 w3[0] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
4085 w2[3] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
4086 w2[2] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
4087 w2[1] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
4088 w2[0] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
4089 w1[3] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
4090 w1[2] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
4091 w1[1] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4092 w1[0] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4093 w0[3] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4094 w0[2] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4095 w0[1] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4096 w0[0] = 0;
4097
4098 if (offset_mod_4 == 0)
4099 {
4100 w0[1] = w0[2];
4101 w0[2] = w0[3];
4102 w0[3] = w1[0];
4103 w1[0] = w1[1];
4104 w1[1] = w1[2];
4105 w1[2] = w1[3];
4106 w1[3] = w2[0];
4107 w2[0] = w2[1];
4108 w2[1] = w2[2];
4109 w2[2] = w2[3];
4110 w2[3] = w3[0];
4111 w3[0] = w3[1];
4112 w3[1] = w3[2];
4113 w3[2] = 0;
4114 }
4115
4116 break;
4117
4118 case 2:
4119 w3[2] = amd_bytealign_S ( 0, w2[3], offset_minus_4);
4120 w3[1] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
4121 w3[0] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
4122 w2[3] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
4123 w2[2] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
4124 w2[1] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
4125 w2[0] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
4126 w1[3] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
4127 w1[2] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4128 w1[1] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4129 w1[0] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4130 w0[3] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4131 w0[2] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4132 w0[1] = 0;
4133 w0[0] = 0;
4134
4135 if (offset_mod_4 == 0)
4136 {
4137 w0[2] = w0[3];
4138 w0[3] = w1[0];
4139 w1[0] = w1[1];
4140 w1[1] = w1[2];
4141 w1[2] = w1[3];
4142 w1[3] = w2[0];
4143 w2[0] = w2[1];
4144 w2[1] = w2[2];
4145 w2[2] = w2[3];
4146 w2[3] = w3[0];
4147 w3[0] = w3[1];
4148 w3[1] = w3[2];
4149 w3[2] = 0;
4150 }
4151
4152 break;
4153
4154 case 3:
4155 w3[2] = amd_bytealign_S ( 0, w2[2], offset_minus_4);
4156 w3[1] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
4157 w3[0] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
4158 w2[3] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
4159 w2[2] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
4160 w2[1] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
4161 w2[0] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
4162 w1[3] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4163 w1[2] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4164 w1[1] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4165 w1[0] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4166 w0[3] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4167 w0[2] = 0;
4168 w0[1] = 0;
4169 w0[0] = 0;
4170
4171 if (offset_mod_4 == 0)
4172 {
4173 w0[3] = w1[0];
4174 w1[0] = w1[1];
4175 w1[1] = w1[2];
4176 w1[2] = w1[3];
4177 w1[3] = w2[0];
4178 w2[0] = w2[1];
4179 w2[1] = w2[2];
4180 w2[2] = w2[3];
4181 w2[3] = w3[0];
4182 w3[0] = w3[1];
4183 w3[1] = w3[2];
4184 w3[2] = 0;
4185 }
4186
4187 break;
4188
4189 case 4:
4190 w3[2] = amd_bytealign_S ( 0, w2[1], offset_minus_4);
4191 w3[1] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
4192 w3[0] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
4193 w2[3] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
4194 w2[2] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
4195 w2[1] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
4196 w2[0] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4197 w1[3] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4198 w1[2] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4199 w1[1] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4200 w1[0] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4201 w0[3] = 0;
4202 w0[2] = 0;
4203 w0[1] = 0;
4204 w0[0] = 0;
4205
4206 if (offset_mod_4 == 0)
4207 {
4208 w1[0] = w1[1];
4209 w1[1] = w1[2];
4210 w1[2] = w1[3];
4211 w1[3] = w2[0];
4212 w2[0] = w2[1];
4213 w2[1] = w2[2];
4214 w2[2] = w2[3];
4215 w2[3] = w3[0];
4216 w3[0] = w3[1];
4217 w3[1] = w3[2];
4218 w3[2] = 0;
4219 }
4220
4221 break;
4222
4223 case 5:
4224 w3[2] = amd_bytealign_S ( 0, w2[0], offset_minus_4);
4225 w3[1] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
4226 w3[0] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
4227 w2[3] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
4228 w2[2] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
4229 w2[1] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4230 w2[0] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4231 w1[3] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4232 w1[2] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4233 w1[1] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4234 w1[0] = 0;
4235 w0[3] = 0;
4236 w0[2] = 0;
4237 w0[1] = 0;
4238 w0[0] = 0;
4239
4240 if (offset_mod_4 == 0)
4241 {
4242 w1[1] = w1[2];
4243 w1[2] = w1[3];
4244 w1[3] = w2[0];
4245 w2[0] = w2[1];
4246 w2[1] = w2[2];
4247 w2[2] = w2[3];
4248 w2[3] = w3[0];
4249 w3[0] = w3[1];
4250 w3[1] = w3[2];
4251 w3[2] = 0;
4252 }
4253
4254 break;
4255
4256 case 6:
4257 w3[2] = amd_bytealign_S ( 0, w1[3], offset_minus_4);
4258 w3[1] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
4259 w3[0] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
4260 w2[3] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
4261 w2[2] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4262 w2[1] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4263 w2[0] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4264 w1[3] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4265 w1[2] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4266 w1[1] = 0;
4267 w1[0] = 0;
4268 w0[3] = 0;
4269 w0[2] = 0;
4270 w0[1] = 0;
4271 w0[0] = 0;
4272
4273 if (offset_mod_4 == 0)
4274 {
4275 w1[2] = w1[3];
4276 w1[3] = w2[0];
4277 w2[0] = w2[1];
4278 w2[1] = w2[2];
4279 w2[2] = w2[3];
4280 w2[3] = w3[0];
4281 w3[0] = w3[1];
4282 w3[1] = w3[2];
4283 w3[2] = 0;
4284 }
4285
4286 break;
4287
4288 case 7:
4289 w3[2] = amd_bytealign_S ( 0, w1[2], offset_minus_4);
4290 w3[1] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
4291 w3[0] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
4292 w2[3] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4293 w2[2] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4294 w2[1] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4295 w2[0] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4296 w1[3] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4297 w1[2] = 0;
4298 w1[1] = 0;
4299 w1[0] = 0;
4300 w0[3] = 0;
4301 w0[2] = 0;
4302 w0[1] = 0;
4303 w0[0] = 0;
4304
4305 if (offset_mod_4 == 0)
4306 {
4307 w1[3] = w2[0];
4308 w2[0] = w2[1];
4309 w2[1] = w2[2];
4310 w2[2] = w2[3];
4311 w2[3] = w3[0];
4312 w3[0] = w3[1];
4313 w3[1] = w3[2];
4314 w3[2] = 0;
4315 }
4316
4317 break;
4318
4319 case 8:
4320 w3[2] = amd_bytealign_S ( 0, w1[1], offset_minus_4);
4321 w3[1] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
4322 w3[0] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4323 w2[3] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4324 w2[2] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4325 w2[1] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4326 w2[0] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4327 w1[3] = 0;
4328 w1[2] = 0;
4329 w1[1] = 0;
4330 w1[0] = 0;
4331 w0[3] = 0;
4332 w0[2] = 0;
4333 w0[1] = 0;
4334 w0[0] = 0;
4335
4336 if (offset_mod_4 == 0)
4337 {
4338 w2[0] = w2[1];
4339 w2[1] = w2[2];
4340 w2[2] = w2[3];
4341 w2[3] = w3[0];
4342 w3[0] = w3[1];
4343 w3[1] = w3[2];
4344 w3[2] = 0;
4345 }
4346
4347 break;
4348
4349 case 9:
4350 w3[2] = amd_bytealign_S ( 0, w1[0], offset_minus_4);
4351 w3[1] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
4352 w3[0] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4353 w2[3] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4354 w2[2] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4355 w2[1] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4356 w2[0] = 0;
4357 w1[3] = 0;
4358 w1[2] = 0;
4359 w1[1] = 0;
4360 w1[0] = 0;
4361 w0[3] = 0;
4362 w0[2] = 0;
4363 w0[1] = 0;
4364 w0[0] = 0;
4365
4366 if (offset_mod_4 == 0)
4367 {
4368 w2[1] = w2[2];
4369 w2[2] = w2[3];
4370 w2[3] = w3[0];
4371 w3[0] = w3[1];
4372 w3[1] = w3[2];
4373 w3[2] = 0;
4374 }
4375
4376 break;
4377
4378 case 10:
4379 w3[2] = amd_bytealign_S ( 0, w0[3], offset_minus_4);
4380 w3[1] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
4381 w3[0] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4382 w2[3] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4383 w2[2] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4384 w2[1] = 0;
4385 w2[0] = 0;
4386 w1[3] = 0;
4387 w1[2] = 0;
4388 w1[1] = 0;
4389 w1[0] = 0;
4390 w0[3] = 0;
4391 w0[2] = 0;
4392 w0[1] = 0;
4393 w0[0] = 0;
4394
4395 if (offset_mod_4 == 0)
4396 {
4397 w2[2] = w2[3];
4398 w2[3] = w3[0];
4399 w3[0] = w3[1];
4400 w3[1] = w3[2];
4401 w3[2] = 0;
4402 }
4403
4404 break;
4405
4406 case 11:
4407 w3[2] = amd_bytealign_S ( 0, w0[2], offset_minus_4);
4408 w3[1] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
4409 w3[0] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4410 w2[3] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4411 w2[2] = 0;
4412 w2[1] = 0;
4413 w2[0] = 0;
4414 w1[3] = 0;
4415 w1[2] = 0;
4416 w1[1] = 0;
4417 w1[0] = 0;
4418 w0[3] = 0;
4419 w0[2] = 0;
4420 w0[1] = 0;
4421 w0[0] = 0;
4422
4423 if (offset_mod_4 == 0)
4424 {
4425 w2[3] = w3[0];
4426 w3[0] = w3[1];
4427 w3[1] = w3[2];
4428 w3[2] = 0;
4429 }
4430
4431 break;
4432
4433 case 12:
4434 w3[2] = amd_bytealign_S ( 0, w0[1], offset_minus_4);
4435 w3[1] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
4436 w3[0] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4437 w2[3] = 0;
4438 w2[2] = 0;
4439 w2[1] = 0;
4440 w2[0] = 0;
4441 w1[3] = 0;
4442 w1[2] = 0;
4443 w1[1] = 0;
4444 w1[0] = 0;
4445 w0[3] = 0;
4446 w0[2] = 0;
4447 w0[1] = 0;
4448 w0[0] = 0;
4449
4450 if (offset_mod_4 == 0)
4451 {
4452 w3[0] = w3[1];
4453 w3[1] = w3[2];
4454 w3[2] = 0;
4455 }
4456
4457 break;
4458
4459 case 13:
4460 w3[2] = amd_bytealign_S ( 0, w0[0], offset_minus_4);
4461 w3[1] = amd_bytealign_S (w0[0], 0, offset_minus_4);
4462 w3[0] = 0;
4463 w2[3] = 0;
4464 w2[2] = 0;
4465 w2[1] = 0;
4466 w2[0] = 0;
4467 w1[3] = 0;
4468 w1[2] = 0;
4469 w1[1] = 0;
4470 w1[0] = 0;
4471 w0[3] = 0;
4472 w0[2] = 0;
4473 w0[1] = 0;
4474 w0[0] = 0;
4475
4476 if (offset_mod_4 == 0)
4477 {
4478 w3[1] = w3[2];
4479 w3[2] = 0;
4480 }
4481
4482 break;
4483 }
4484 #endif
4485
4486 #ifdef IS_NV
4487 const int offset_minus_4 = 4 - (offset % 4);
4488
4489 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
4490
4491 switch (offset / 4)
4492 {
4493 case 0:
4494 w3[1] = __byte_perm_S (w3[0], w3[1], selector);
4495 w3[0] = __byte_perm_S (w2[3], w3[0], selector);
4496 w2[3] = __byte_perm_S (w2[2], w2[3], selector);
4497 w2[2] = __byte_perm_S (w2[1], w2[2], selector);
4498 w2[1] = __byte_perm_S (w2[0], w2[1], selector);
4499 w2[0] = __byte_perm_S (w1[3], w2[0], selector);
4500 w1[3] = __byte_perm_S (w1[2], w1[3], selector);
4501 w1[2] = __byte_perm_S (w1[1], w1[2], selector);
4502 w1[1] = __byte_perm_S (w1[0], w1[1], selector);
4503 w1[0] = __byte_perm_S (w0[3], w1[0], selector);
4504 w0[3] = __byte_perm_S (w0[2], w0[3], selector);
4505 w0[2] = __byte_perm_S (w0[1], w0[2], selector);
4506 w0[1] = __byte_perm_S (w0[0], w0[1], selector);
4507 w0[0] = __byte_perm_S ( 0, w0[0], selector);
4508
4509 break;
4510
4511 case 1:
4512 w3[1] = __byte_perm_S (w2[3], w3[0], selector);
4513 w3[0] = __byte_perm_S (w2[2], w2[3], selector);
4514 w2[3] = __byte_perm_S (w2[1], w2[2], selector);
4515 w2[2] = __byte_perm_S (w2[0], w2[1], selector);
4516 w2[1] = __byte_perm_S (w1[3], w2[0], selector);
4517 w2[0] = __byte_perm_S (w1[2], w1[3], selector);
4518 w1[3] = __byte_perm_S (w1[1], w1[2], selector);
4519 w1[2] = __byte_perm_S (w1[0], w1[1], selector);
4520 w1[1] = __byte_perm_S (w0[3], w1[0], selector);
4521 w1[0] = __byte_perm_S (w0[2], w0[3], selector);
4522 w0[3] = __byte_perm_S (w0[1], w0[2], selector);
4523 w0[2] = __byte_perm_S (w0[0], w0[1], selector);
4524 w0[1] = __byte_perm_S ( 0, w0[0], selector);
4525 w0[0] = 0;
4526
4527 break;
4528
4529 case 2:
4530 w3[1] = __byte_perm_S (w2[2], w2[3], selector);
4531 w3[0] = __byte_perm_S (w2[1], w2[2], selector);
4532 w2[3] = __byte_perm_S (w2[0], w2[1], selector);
4533 w2[2] = __byte_perm_S (w1[3], w2[0], selector);
4534 w2[1] = __byte_perm_S (w1[2], w1[3], selector);
4535 w2[0] = __byte_perm_S (w1[1], w1[2], selector);
4536 w1[3] = __byte_perm_S (w1[0], w1[1], selector);
4537 w1[2] = __byte_perm_S (w0[3], w1[0], selector);
4538 w1[1] = __byte_perm_S (w0[2], w0[3], selector);
4539 w1[0] = __byte_perm_S (w0[1], w0[2], selector);
4540 w0[3] = __byte_perm_S (w0[0], w0[1], selector);
4541 w0[2] = __byte_perm_S ( 0, w0[0], selector);
4542 w0[1] = 0;
4543 w0[0] = 0;
4544
4545 break;
4546
4547 case 3:
4548 w3[1] = __byte_perm_S (w2[1], w2[2], selector);
4549 w3[0] = __byte_perm_S (w2[0], w2[1], selector);
4550 w2[3] = __byte_perm_S (w1[3], w2[0], selector);
4551 w2[2] = __byte_perm_S (w1[2], w1[3], selector);
4552 w2[1] = __byte_perm_S (w1[1], w1[2], selector);
4553 w2[0] = __byte_perm_S (w1[0], w1[1], selector);
4554 w1[3] = __byte_perm_S (w0[3], w1[0], selector);
4555 w1[2] = __byte_perm_S (w0[2], w0[3], selector);
4556 w1[1] = __byte_perm_S (w0[1], w0[2], selector);
4557 w1[0] = __byte_perm_S (w0[0], w0[1], selector);
4558 w0[3] = __byte_perm_S ( 0, w0[0], selector);
4559 w0[2] = 0;
4560 w0[1] = 0;
4561 w0[0] = 0;
4562
4563 break;
4564
4565 case 4:
4566 w3[1] = __byte_perm_S (w2[0], w2[1], selector);
4567 w3[0] = __byte_perm_S (w1[3], w2[0], selector);
4568 w2[3] = __byte_perm_S (w1[2], w1[3], selector);
4569 w2[2] = __byte_perm_S (w1[1], w1[2], selector);
4570 w2[1] = __byte_perm_S (w1[0], w1[1], selector);
4571 w2[0] = __byte_perm_S (w0[3], w1[0], selector);
4572 w1[3] = __byte_perm_S (w0[2], w0[3], selector);
4573 w1[2] = __byte_perm_S (w0[1], w0[2], selector);
4574 w1[1] = __byte_perm_S (w0[0], w0[1], selector);
4575 w1[0] = __byte_perm_S ( 0, w0[0], selector);
4576 w0[3] = 0;
4577 w0[2] = 0;
4578 w0[1] = 0;
4579 w0[0] = 0;
4580
4581 break;
4582
4583 case 5:
4584 w3[1] = __byte_perm_S (w1[3], w2[0], selector);
4585 w3[0] = __byte_perm_S (w1[2], w1[3], selector);
4586 w2[3] = __byte_perm_S (w1[1], w1[2], selector);
4587 w2[2] = __byte_perm_S (w1[0], w1[1], selector);
4588 w2[1] = __byte_perm_S (w0[3], w1[0], selector);
4589 w2[0] = __byte_perm_S (w0[2], w0[3], selector);
4590 w1[3] = __byte_perm_S (w0[1], w0[2], selector);
4591 w1[2] = __byte_perm_S (w0[0], w0[1], selector);
4592 w1[1] = __byte_perm_S ( 0, w0[0], selector);
4593 w1[0] = 0;
4594 w0[3] = 0;
4595 w0[2] = 0;
4596 w0[1] = 0;
4597 w0[0] = 0;
4598
4599 break;
4600
4601 case 6:
4602 w3[1] = __byte_perm_S (w1[2], w1[3], selector);
4603 w3[0] = __byte_perm_S (w1[1], w1[2], selector);
4604 w2[3] = __byte_perm_S (w1[0], w1[1], selector);
4605 w2[2] = __byte_perm_S (w0[3], w1[0], selector);
4606 w2[1] = __byte_perm_S (w0[2], w0[3], selector);
4607 w2[0] = __byte_perm_S (w0[1], w0[2], selector);
4608 w1[3] = __byte_perm_S (w0[0], w0[1], selector);
4609 w1[2] = __byte_perm_S ( 0, w0[0], selector);
4610 w1[1] = 0;
4611 w1[0] = 0;
4612 w0[3] = 0;
4613 w0[2] = 0;
4614 w0[1] = 0;
4615 w0[0] = 0;
4616
4617 break;
4618
4619 case 7:
4620 w3[1] = __byte_perm_S (w1[1], w1[2], selector);
4621 w3[0] = __byte_perm_S (w1[0], w1[1], selector);
4622 w2[3] = __byte_perm_S (w0[3], w1[0], selector);
4623 w2[2] = __byte_perm_S (w0[2], w0[3], selector);
4624 w2[1] = __byte_perm_S (w0[1], w0[2], selector);
4625 w2[0] = __byte_perm_S (w0[0], w0[1], selector);
4626 w1[3] = __byte_perm_S ( 0, w0[0], selector);
4627 w1[2] = 0;
4628 w1[1] = 0;
4629 w1[0] = 0;
4630 w0[3] = 0;
4631 w0[2] = 0;
4632 w0[1] = 0;
4633 w0[0] = 0;
4634
4635 break;
4636
4637 case 8:
4638 w3[1] = __byte_perm_S (w1[0], w1[1], selector);
4639 w3[0] = __byte_perm_S (w0[3], w1[0], selector);
4640 w2[3] = __byte_perm_S (w0[2], w0[3], selector);
4641 w2[2] = __byte_perm_S (w0[1], w0[2], selector);
4642 w2[1] = __byte_perm_S (w0[0], w0[1], selector);
4643 w2[0] = __byte_perm_S ( 0, w0[0], selector);
4644 w1[3] = 0;
4645 w1[2] = 0;
4646 w1[1] = 0;
4647 w1[0] = 0;
4648 w0[3] = 0;
4649 w0[2] = 0;
4650 w0[1] = 0;
4651 w0[0] = 0;
4652
4653 break;
4654
4655 case 9:
4656 w3[1] = __byte_perm_S (w0[3], w1[0], selector);
4657 w3[0] = __byte_perm_S (w0[2], w0[3], selector);
4658 w2[3] = __byte_perm_S (w0[1], w0[2], selector);
4659 w2[2] = __byte_perm_S (w0[0], w0[1], selector);
4660 w2[1] = __byte_perm_S ( 0, w0[0], selector);
4661 w2[0] = 0;
4662 w1[3] = 0;
4663 w1[2] = 0;
4664 w1[1] = 0;
4665 w1[0] = 0;
4666 w0[3] = 0;
4667 w0[2] = 0;
4668 w0[1] = 0;
4669 w0[0] = 0;
4670
4671 break;
4672
4673 case 10:
4674 w3[1] = __byte_perm_S (w0[2], w0[3], selector);
4675 w3[0] = __byte_perm_S (w0[1], w0[2], selector);
4676 w2[3] = __byte_perm_S (w0[0], w0[1], selector);
4677 w2[2] = __byte_perm_S ( 0, w0[0], selector);
4678 w2[1] = 0;
4679 w2[0] = 0;
4680 w1[3] = 0;
4681 w1[2] = 0;
4682 w1[1] = 0;
4683 w1[0] = 0;
4684 w0[3] = 0;
4685 w0[2] = 0;
4686 w0[1] = 0;
4687 w0[0] = 0;
4688
4689 break;
4690
4691 case 11:
4692 w3[1] = __byte_perm_S (w0[1], w0[2], selector);
4693 w3[0] = __byte_perm_S (w0[0], w0[1], selector);
4694 w2[3] = __byte_perm_S ( 0, w0[0], selector);
4695 w2[2] = 0;
4696 w2[1] = 0;
4697 w2[0] = 0;
4698 w1[3] = 0;
4699 w1[2] = 0;
4700 w1[1] = 0;
4701 w1[0] = 0;
4702 w0[3] = 0;
4703 w0[2] = 0;
4704 w0[1] = 0;
4705 w0[0] = 0;
4706
4707 break;
4708
4709 case 12:
4710 w3[1] = __byte_perm_S (w0[0], w0[1], selector);
4711 w3[0] = __byte_perm_S ( 0, w0[0], selector);
4712 w2[3] = 0;
4713 w2[2] = 0;
4714 w2[1] = 0;
4715 w2[0] = 0;
4716 w1[3] = 0;
4717 w1[2] = 0;
4718 w1[1] = 0;
4719 w1[0] = 0;
4720 w0[3] = 0;
4721 w0[2] = 0;
4722 w0[1] = 0;
4723 w0[0] = 0;
4724
4725 break;
4726
4727 case 13:
4728 w3[1] = __byte_perm_S ( 0, w0[0], selector);
4729 w3[0] = 0;
4730 w2[3] = 0;
4731 w2[2] = 0;
4732 w2[1] = 0;
4733 w2[0] = 0;
4734 w1[3] = 0;
4735 w1[2] = 0;
4736 w1[1] = 0;
4737 w1[0] = 0;
4738 w0[3] = 0;
4739 w0[2] = 0;
4740 w0[1] = 0;
4741 w0[0] = 0;
4742
4743 break;
4744 }
4745 #endif
4746 }
4747
4748 static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
4749 {
4750 #if defined IS_AMD || defined IS_GENERIC
4751 switch (offset / 4)
4752 {
4753 case 0:
4754 w3[2] = amd_bytealign_S (w3[1], 0, offset);
4755 w3[1] = amd_bytealign_S (w3[0], w3[1], offset);
4756 w3[0] = amd_bytealign_S (w2[3], w3[0], offset);
4757 w2[3] = amd_bytealign_S (w2[2], w2[3], offset);
4758 w2[2] = amd_bytealign_S (w2[1], w2[2], offset);
4759 w2[1] = amd_bytealign_S (w2[0], w2[1], offset);
4760 w2[0] = amd_bytealign_S (w1[3], w2[0], offset);
4761 w1[3] = amd_bytealign_S (w1[2], w1[3], offset);
4762 w1[2] = amd_bytealign_S (w1[1], w1[2], offset);
4763 w1[1] = amd_bytealign_S (w1[0], w1[1], offset);
4764 w1[0] = amd_bytealign_S (w0[3], w1[0], offset);
4765 w0[3] = amd_bytealign_S (w0[2], w0[3], offset);
4766 w0[2] = amd_bytealign_S (w0[1], w0[2], offset);
4767 w0[1] = amd_bytealign_S (w0[0], w0[1], offset);
4768 w0[0] = amd_bytealign_S ( 0, w0[0], offset);
4769 break;
4770
4771 case 1:
4772 w3[2] = amd_bytealign_S (w3[0], 0, offset);
4773 w3[1] = amd_bytealign_S (w2[3], w3[0], offset);
4774 w3[0] = amd_bytealign_S (w2[2], w2[3], offset);
4775 w2[3] = amd_bytealign_S (w2[1], w2[2], offset);
4776 w2[2] = amd_bytealign_S (w2[0], w2[1], offset);
4777 w2[1] = amd_bytealign_S (w1[3], w2[0], offset);
4778 w2[0] = amd_bytealign_S (w1[2], w1[3], offset);
4779 w1[3] = amd_bytealign_S (w1[1], w1[2], offset);
4780 w1[2] = amd_bytealign_S (w1[0], w1[1], offset);
4781 w1[1] = amd_bytealign_S (w0[3], w1[0], offset);
4782 w1[0] = amd_bytealign_S (w0[2], w0[3], offset);
4783 w0[3] = amd_bytealign_S (w0[1], w0[2], offset);
4784 w0[2] = amd_bytealign_S (w0[0], w0[1], offset);
4785 w0[1] = amd_bytealign_S ( 0, w0[0], offset);
4786 w0[0] = 0;
4787 break;
4788
4789 case 2:
4790 w3[2] = amd_bytealign_S (w2[3], 0, offset);
4791 w3[1] = amd_bytealign_S (w2[2], w2[3], offset);
4792 w3[0] = amd_bytealign_S (w2[1], w2[2], offset);
4793 w2[3] = amd_bytealign_S (w2[0], w2[1], offset);
4794 w2[2] = amd_bytealign_S (w1[3], w2[0], offset);
4795 w2[1] = amd_bytealign_S (w1[2], w1[3], offset);
4796 w2[0] = amd_bytealign_S (w1[1], w1[2], offset);
4797 w1[3] = amd_bytealign_S (w1[0], w1[1], offset);
4798 w1[2] = amd_bytealign_S (w0[3], w1[0], offset);
4799 w1[1] = amd_bytealign_S (w0[2], w0[3], offset);
4800 w1[0] = amd_bytealign_S (w0[1], w0[2], offset);
4801 w0[3] = amd_bytealign_S (w0[0], w0[1], offset);
4802 w0[2] = amd_bytealign_S ( 0, w0[0], offset);
4803 w0[1] = 0;
4804 w0[0] = 0;
4805 break;
4806
4807 case 3:
4808 w3[2] = amd_bytealign_S (w2[2], 0, offset);
4809 w3[1] = amd_bytealign_S (w2[1], w2[2], offset);
4810 w3[0] = amd_bytealign_S (w2[0], w2[1], offset);
4811 w2[3] = amd_bytealign_S (w1[3], w2[0], offset);
4812 w2[2] = amd_bytealign_S (w1[2], w1[3], offset);
4813 w2[1] = amd_bytealign_S (w1[1], w1[2], offset);
4814 w2[0] = amd_bytealign_S (w1[0], w1[1], offset);
4815 w1[3] = amd_bytealign_S (w0[3], w1[0], offset);
4816 w1[2] = amd_bytealign_S (w0[2], w0[3], offset);
4817 w1[1] = amd_bytealign_S (w0[1], w0[2], offset);
4818 w1[0] = amd_bytealign_S (w0[0], w0[1], offset);
4819 w0[3] = amd_bytealign_S ( 0, w0[0], offset);
4820 w0[2] = 0;
4821 w0[1] = 0;
4822 w0[0] = 0;
4823 break;
4824
4825 case 4:
4826 w3[2] = amd_bytealign_S (w2[1], 0, offset);
4827 w3[1] = amd_bytealign_S (w2[0], w2[1], offset);
4828 w3[0] = amd_bytealign_S (w1[3], w2[0], offset);
4829 w2[3] = amd_bytealign_S (w1[2], w1[3], offset);
4830 w2[2] = amd_bytealign_S (w1[1], w1[2], offset);
4831 w2[1] = amd_bytealign_S (w1[0], w1[1], offset);
4832 w2[0] = amd_bytealign_S (w0[3], w1[0], offset);
4833 w1[3] = amd_bytealign_S (w0[2], w0[3], offset);
4834 w1[2] = amd_bytealign_S (w0[1], w0[2], offset);
4835 w1[1] = amd_bytealign_S (w0[0], w0[1], offset);
4836 w1[0] = amd_bytealign_S ( 0, w0[0], offset);
4837 w0[3] = 0;
4838 w0[2] = 0;
4839 w0[1] = 0;
4840 w0[0] = 0;
4841 break;
4842
4843 case 5:
4844 w3[2] = amd_bytealign_S (w2[0], 0, offset);
4845 w3[1] = amd_bytealign_S (w1[3], w2[0], offset);
4846 w3[0] = amd_bytealign_S (w1[2], w1[3], offset);
4847 w2[3] = amd_bytealign_S (w1[1], w1[2], offset);
4848 w2[2] = amd_bytealign_S (w1[0], w1[1], offset);
4849 w2[1] = amd_bytealign_S (w0[3], w1[0], offset);
4850 w2[0] = amd_bytealign_S (w0[2], w0[3], offset);
4851 w1[3] = amd_bytealign_S (w0[1], w0[2], offset);
4852 w1[2] = amd_bytealign_S (w0[0], w0[1], offset);
4853 w1[1] = amd_bytealign_S ( 0, w0[0], offset);
4854 w1[0] = 0;
4855 w0[3] = 0;
4856 w0[2] = 0;
4857 w0[1] = 0;
4858 w0[0] = 0;
4859 break;
4860
4861 case 6:
4862 w3[2] = amd_bytealign_S (w1[3], 0, offset);
4863 w3[1] = amd_bytealign_S (w1[2], w1[3], offset);
4864 w3[0] = amd_bytealign_S (w1[1], w1[2], offset);
4865 w2[3] = amd_bytealign_S (w1[0], w1[1], offset);
4866 w2[2] = amd_bytealign_S (w0[3], w1[0], offset);
4867 w2[1] = amd_bytealign_S (w0[2], w0[3], offset);
4868 w2[0] = amd_bytealign_S (w0[1], w0[2], offset);
4869 w1[3] = amd_bytealign_S (w0[0], w0[1], offset);
4870 w1[2] = amd_bytealign_S ( 0, w0[0], offset);
4871 w1[1] = 0;
4872 w1[0] = 0;
4873 w0[3] = 0;
4874 w0[2] = 0;
4875 w0[1] = 0;
4876 w0[0] = 0;
4877 break;
4878
4879 case 7:
4880 w3[2] = amd_bytealign_S (w1[2], 0, offset);
4881 w3[1] = amd_bytealign_S (w1[1], w1[2], offset);
4882 w3[0] = amd_bytealign_S (w1[0], w1[1], offset);
4883 w2[3] = amd_bytealign_S (w0[3], w1[0], offset);
4884 w2[2] = amd_bytealign_S (w0[2], w0[3], offset);
4885 w2[1] = amd_bytealign_S (w0[1], w0[2], offset);
4886 w2[0] = amd_bytealign_S (w0[0], w0[1], offset);
4887 w1[3] = amd_bytealign_S ( 0, w0[0], offset);
4888 w1[2] = 0;
4889 w1[1] = 0;
4890 w1[0] = 0;
4891 w0[3] = 0;
4892 w0[2] = 0;
4893 w0[1] = 0;
4894 w0[0] = 0;
4895 break;
4896
4897 case 8:
4898 w3[2] = amd_bytealign_S (w1[1], 0, offset);
4899 w3[1] = amd_bytealign_S (w1[0], w1[1], offset);
4900 w3[0] = amd_bytealign_S (w0[3], w1[0], offset);
4901 w2[3] = amd_bytealign_S (w0[2], w0[3], offset);
4902 w2[2] = amd_bytealign_S (w0[1], w0[2], offset);
4903 w2[1] = amd_bytealign_S (w0[0], w0[1], offset);
4904 w2[0] = amd_bytealign_S ( 0, w0[0], offset);
4905 w1[3] = 0;
4906 w1[2] = 0;
4907 w1[1] = 0;
4908 w1[0] = 0;
4909 w0[3] = 0;
4910 w0[2] = 0;
4911 w0[1] = 0;
4912 w0[0] = 0;
4913 break;
4914
4915 case 9:
4916 w3[2] = amd_bytealign_S (w1[0], 0, offset);
4917 w3[1] = amd_bytealign_S (w0[3], w1[0], offset);
4918 w3[0] = amd_bytealign_S (w0[2], w0[3], offset);
4919 w2[3] = amd_bytealign_S (w0[1], w0[2], offset);
4920 w2[2] = amd_bytealign_S (w0[0], w0[1], offset);
4921 w2[1] = amd_bytealign_S ( 0, w0[0], offset);
4922 w2[0] = 0;
4923 w1[3] = 0;
4924 w1[2] = 0;
4925 w1[1] = 0;
4926 w1[0] = 0;
4927 w0[3] = 0;
4928 w0[2] = 0;
4929 w0[1] = 0;
4930 w0[0] = 0;
4931 break;
4932
4933 case 10:
4934 w3[2] = amd_bytealign_S (w0[3], 0, offset);
4935 w3[1] = amd_bytealign_S (w0[2], w0[3], offset);
4936 w3[0] = amd_bytealign_S (w0[1], w0[2], offset);
4937 w2[3] = amd_bytealign_S (w0[0], w0[1], offset);
4938 w2[2] = amd_bytealign_S ( 0, w0[0], offset);
4939 w2[1] = 0;
4940 w2[0] = 0;
4941 w1[3] = 0;
4942 w1[2] = 0;
4943 w1[1] = 0;
4944 w1[0] = 0;
4945 w0[3] = 0;
4946 w0[2] = 0;
4947 w0[1] = 0;
4948 w0[0] = 0;
4949 break;
4950
4951 case 11:
4952 w3[2] = amd_bytealign_S (w0[2], 0, offset);
4953 w3[1] = amd_bytealign_S (w0[1], w0[2], offset);
4954 w3[0] = amd_bytealign_S (w0[0], w0[1], offset);
4955 w2[3] = amd_bytealign_S ( 0, w0[0], offset);
4956 w2[2] = 0;
4957 w2[1] = 0;
4958 w2[0] = 0;
4959 w1[3] = 0;
4960 w1[2] = 0;
4961 w1[1] = 0;
4962 w1[0] = 0;
4963 w0[3] = 0;
4964 w0[2] = 0;
4965 w0[1] = 0;
4966 w0[0] = 0;
4967 break;
4968
4969 case 12:
4970 w3[2] = amd_bytealign_S (w0[1], 0, offset);
4971 w3[1] = amd_bytealign_S (w0[0], w0[1], offset);
4972 w3[0] = amd_bytealign_S ( 0, w0[0], offset);
4973 w2[3] = 0;
4974 w2[2] = 0;
4975 w2[1] = 0;
4976 w2[0] = 0;
4977 w1[3] = 0;
4978 w1[2] = 0;
4979 w1[1] = 0;
4980 w1[0] = 0;
4981 w0[3] = 0;
4982 w0[2] = 0;
4983 w0[1] = 0;
4984 w0[0] = 0;
4985 break;
4986
4987 case 13:
4988 w3[2] = amd_bytealign_S (w0[0], 0, offset);
4989 w3[1] = amd_bytealign_S ( 0, w0[0], offset);
4990 w3[0] = 0;
4991 w2[3] = 0;
4992 w2[2] = 0;
4993 w2[1] = 0;
4994 w2[0] = 0;
4995 w1[3] = 0;
4996 w1[2] = 0;
4997 w1[1] = 0;
4998 w1[0] = 0;
4999 w0[3] = 0;
5000 w0[2] = 0;
5001 w0[1] = 0;
5002 w0[0] = 0;
5003 break;
5004 }
5005 #endif
5006
5007 #ifdef IS_NV
5008 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
5009
5010 switch (offset / 4)
5011 {
5012 case 0:
5013 w3[1] = __byte_perm_S (w3[1], w3[0], selector);
5014 w3[0] = __byte_perm_S (w3[0], w2[3], selector);
5015 w2[3] = __byte_perm_S (w2[3], w2[2], selector);
5016 w2[2] = __byte_perm_S (w2[2], w2[1], selector);
5017 w2[1] = __byte_perm_S (w2[1], w2[0], selector);
5018 w2[0] = __byte_perm_S (w2[0], w1[3], selector);
5019 w1[3] = __byte_perm_S (w1[3], w1[2], selector);
5020 w1[2] = __byte_perm_S (w1[2], w1[1], selector);
5021 w1[1] = __byte_perm_S (w1[1], w1[0], selector);
5022 w1[0] = __byte_perm_S (w1[0], w0[3], selector);
5023 w0[3] = __byte_perm_S (w0[3], w0[2], selector);
5024 w0[2] = __byte_perm_S (w0[2], w0[1], selector);
5025 w0[1] = __byte_perm_S (w0[1], w0[0], selector);
5026 w0[0] = __byte_perm_S (w0[0], 0, selector);
5027 break;
5028
5029 case 1:
5030 w3[1] = __byte_perm_S (w3[0], w2[3], selector);
5031 w3[0] = __byte_perm_S (w2[3], w2[2], selector);
5032 w2[3] = __byte_perm_S (w2[2], w2[1], selector);
5033 w2[2] = __byte_perm_S (w2[1], w2[0], selector);
5034 w2[1] = __byte_perm_S (w2[0], w1[3], selector);
5035 w2[0] = __byte_perm_S (w1[3], w1[2], selector);
5036 w1[3] = __byte_perm_S (w1[2], w1[1], selector);
5037 w1[2] = __byte_perm_S (w1[1], w1[0], selector);
5038 w1[1] = __byte_perm_S (w1[0], w0[3], selector);
5039 w1[0] = __byte_perm_S (w0[3], w0[2], selector);
5040 w0[3] = __byte_perm_S (w0[2], w0[1], selector);
5041 w0[2] = __byte_perm_S (w0[1], w0[0], selector);
5042 w0[1] = __byte_perm_S (w0[0], 0, selector);
5043 w0[0] = 0;
5044 break;
5045
5046 case 2:
5047 w3[1] = __byte_perm_S (w2[3], w2[2], selector);
5048 w3[0] = __byte_perm_S (w2[2], w2[1], selector);
5049 w2[3] = __byte_perm_S (w2[1], w2[0], selector);
5050 w2[2] = __byte_perm_S (w2[0], w1[3], selector);
5051 w2[1] = __byte_perm_S (w1[3], w1[2], selector);
5052 w2[0] = __byte_perm_S (w1[2], w1[1], selector);
5053 w1[3] = __byte_perm_S (w1[1], w1[0], selector);
5054 w1[2] = __byte_perm_S (w1[0], w0[3], selector);
5055 w1[1] = __byte_perm_S (w0[3], w0[2], selector);
5056 w1[0] = __byte_perm_S (w0[2], w0[1], selector);
5057 w0[3] = __byte_perm_S (w0[1], w0[0], selector);
5058 w0[2] = __byte_perm_S (w0[0], 0, selector);
5059 w0[1] = 0;
5060 w0[0] = 0;
5061 break;
5062
5063 case 3:
5064 w3[1] = __byte_perm_S (w2[2], w2[1], selector);
5065 w3[0] = __byte_perm_S (w2[1], w2[0], selector);
5066 w2[3] = __byte_perm_S (w2[0], w1[3], selector);
5067 w2[2] = __byte_perm_S (w1[3], w1[2], selector);
5068 w2[1] = __byte_perm_S (w1[2], w1[1], selector);
5069 w2[0] = __byte_perm_S (w1[1], w1[0], selector);
5070 w1[3] = __byte_perm_S (w1[0], w0[3], selector);
5071 w1[2] = __byte_perm_S (w0[3], w0[2], selector);
5072 w1[1] = __byte_perm_S (w0[2], w0[1], selector);
5073 w1[0] = __byte_perm_S (w0[1], w0[0], selector);
5074 w0[3] = __byte_perm_S (w0[0], 0, selector);
5075 w0[2] = 0;
5076 w0[1] = 0;
5077 w0[0] = 0;
5078 break;
5079
5080 case 4:
5081 w3[1] = __byte_perm_S (w2[1], w2[0], selector);
5082 w3[0] = __byte_perm_S (w2[0], w1[3], selector);
5083 w2[3] = __byte_perm_S (w1[3], w1[2], selector);
5084 w2[2] = __byte_perm_S (w1[2], w1[1], selector);
5085 w2[1] = __byte_perm_S (w1[1], w1[0], selector);
5086 w2[0] = __byte_perm_S (w1[0], w0[3], selector);
5087 w1[3] = __byte_perm_S (w0[3], w0[2], selector);
5088 w1[2] = __byte_perm_S (w0[2], w0[1], selector);
5089 w1[1] = __byte_perm_S (w0[1], w0[0], selector);
5090 w1[0] = __byte_perm_S (w0[0], 0, selector);
5091 w0[3] = 0;
5092 w0[2] = 0;
5093 w0[1] = 0;
5094 w0[0] = 0;
5095 break;
5096
5097 case 5:
5098 w3[1] = __byte_perm_S (w2[0], w1[3], selector);
5099 w3[0] = __byte_perm_S (w1[3], w1[2], selector);
5100 w2[3] = __byte_perm_S (w1[2], w1[1], selector);
5101 w2[2] = __byte_perm_S (w1[1], w1[0], selector);
5102 w2[1] = __byte_perm_S (w1[0], w0[3], selector);
5103 w2[0] = __byte_perm_S (w0[3], w0[2], selector);
5104 w1[3] = __byte_perm_S (w0[2], w0[1], selector);
5105 w1[2] = __byte_perm_S (w0[1], w0[0], selector);
5106 w1[1] = __byte_perm_S (w0[0], 0, selector);
5107 w1[0] = 0;
5108 w0[3] = 0;
5109 w0[2] = 0;
5110 w0[1] = 0;
5111 w0[0] = 0;
5112 break;
5113
5114 case 6:
5115 w3[1] = __byte_perm_S (w1[3], w1[2], selector);
5116 w3[0] = __byte_perm_S (w1[2], w1[1], selector);
5117 w2[3] = __byte_perm_S (w1[1], w1[0], selector);
5118 w2[2] = __byte_perm_S (w1[0], w0[3], selector);
5119 w2[1] = __byte_perm_S (w0[3], w0[2], selector);
5120 w2[0] = __byte_perm_S (w0[2], w0[1], selector);
5121 w1[3] = __byte_perm_S (w0[1], w0[0], selector);
5122 w1[2] = __byte_perm_S (w0[0], 0, selector);
5123 w1[1] = 0;
5124 w1[0] = 0;
5125 w0[3] = 0;
5126 w0[2] = 0;
5127 w0[1] = 0;
5128 w0[0] = 0;
5129 break;
5130
5131 case 7:
5132 w3[1] = __byte_perm_S (w1[2], w1[1], selector);
5133 w3[0] = __byte_perm_S (w1[1], w1[0], selector);
5134 w2[3] = __byte_perm_S (w1[0], w0[3], selector);
5135 w2[2] = __byte_perm_S (w0[3], w0[2], selector);
5136 w2[1] = __byte_perm_S (w0[2], w0[1], selector);
5137 w2[0] = __byte_perm_S (w0[1], w0[0], selector);
5138 w1[3] = __byte_perm_S (w0[0], 0, selector);
5139 w1[2] = 0;
5140 w1[1] = 0;
5141 w1[0] = 0;
5142 w0[3] = 0;
5143 w0[2] = 0;
5144 w0[1] = 0;
5145 w0[0] = 0;
5146 break;
5147
5148 case 8:
5149 w3[1] = __byte_perm_S (w1[1], w1[0], selector);
5150 w3[0] = __byte_perm_S (w1[0], w0[3], selector);
5151 w2[3] = __byte_perm_S (w0[3], w0[2], selector);
5152 w2[2] = __byte_perm_S (w0[2], w0[1], selector);
5153 w2[1] = __byte_perm_S (w0[1], w0[0], selector);
5154 w2[0] = __byte_perm_S (w0[0], 0, selector);
5155 w1[3] = 0;
5156 w1[2] = 0;
5157 w1[1] = 0;
5158 w1[0] = 0;
5159 w0[3] = 0;
5160 w0[2] = 0;
5161 w0[1] = 0;
5162 w0[0] = 0;
5163 break;
5164
5165 case 9:
5166 w3[1] = __byte_perm_S (w1[0], w0[3], selector);
5167 w3[0] = __byte_perm_S (w0[3], w0[2], selector);
5168 w2[3] = __byte_perm_S (w0[2], w0[1], selector);
5169 w2[2] = __byte_perm_S (w0[1], w0[0], selector);
5170 w2[1] = __byte_perm_S (w0[0], 0, selector);
5171 w2[0] = 0;
5172 w1[3] = 0;
5173 w1[2] = 0;
5174 w1[1] = 0;
5175 w1[0] = 0;
5176 w0[3] = 0;
5177 w0[2] = 0;
5178 w0[1] = 0;
5179 w0[0] = 0;
5180 break;
5181
5182 case 10:
5183 w3[1] = __byte_perm_S (w0[3], w0[2], selector);
5184 w3[0] = __byte_perm_S (w0[2], w0[1], selector);
5185 w2[3] = __byte_perm_S (w0[1], w0[0], selector);
5186 w2[2] = __byte_perm_S (w0[0], 0, selector);
5187 w2[1] = 0;
5188 w2[0] = 0;
5189 w1[3] = 0;
5190 w1[2] = 0;
5191 w1[1] = 0;
5192 w1[0] = 0;
5193 w0[3] = 0;
5194 w0[2] = 0;
5195 w0[1] = 0;
5196 w0[0] = 0;
5197 break;
5198
5199 case 11:
5200 w3[1] = __byte_perm_S (w0[2], w0[1], selector);
5201 w3[0] = __byte_perm_S (w0[1], w0[0], selector);
5202 w2[3] = __byte_perm_S (w0[0], 0, selector);
5203 w2[2] = 0;
5204 w2[1] = 0;
5205 w2[0] = 0;
5206 w1[3] = 0;
5207 w1[2] = 0;
5208 w1[1] = 0;
5209 w1[0] = 0;
5210 w0[3] = 0;
5211 w0[2] = 0;
5212 w0[1] = 0;
5213 w0[0] = 0;
5214 break;
5215
5216 case 12:
5217 w3[1] = __byte_perm_S (w0[1], w0[0], selector);
5218 w3[0] = __byte_perm_S (w0[0], 0, selector);
5219 w2[3] = 0;
5220 w2[2] = 0;
5221 w2[1] = 0;
5222 w2[0] = 0;
5223 w1[3] = 0;
5224 w1[2] = 0;
5225 w1[1] = 0;
5226 w1[0] = 0;
5227 w0[3] = 0;
5228 w0[2] = 0;
5229 w0[1] = 0;
5230 w0[0] = 0;
5231 break;
5232
5233 case 13:
5234 w3[1] = __byte_perm_S (w0[0], 0, selector);
5235 w3[0] = 0;
5236 w2[3] = 0;
5237 w2[2] = 0;
5238 w2[1] = 0;
5239 w2[0] = 0;
5240 w1[3] = 0;
5241 w1[2] = 0;
5242 w1[1] = 0;
5243 w1[0] = 0;
5244 w0[3] = 0;
5245 w0[2] = 0;
5246 w0[1] = 0;
5247 w0[0] = 0;
5248 break;
5249 }
5250 #endif
5251 }
5252
5253 static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
5254 {
5255 #if defined IS_AMD || defined IS_GENERIC
5256 const int offset_mod_4 = offset & 3;
5257
5258 const int offset_minus_4 = 4 - offset;
5259
5260 switch (offset / 4)
5261 {
5262 case 0:
5263 w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4);
5264 w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
5265 w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
5266 w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
5267 w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
5268 w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
5269 w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5270 w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5271 w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5272 w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5273 w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5274 w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5275 w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5276 w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5277 w0[0] = amd_bytealign (w0[0], 0, offset_minus_4);
5278
5279 if (offset_mod_4 == 0)
5280 {
5281 w0[0] = w0[1];
5282 w0[1] = w0[2];
5283 w0[2] = w0[3];
5284 w0[3] = w1[0];
5285 w1[0] = w1[1];
5286 w1[1] = w1[2];
5287 w1[2] = w1[3];
5288 w1[3] = w2[0];
5289 w2[0] = w2[1];
5290 w2[1] = w2[2];
5291 w2[2] = w2[3];
5292 w2[3] = w3[0];
5293 w3[0] = w3[1];
5294 w3[1] = w3[2];
5295 w3[2] = 0;
5296 }
5297
5298 break;
5299
5300 case 1:
5301 w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4);
5302 w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
5303 w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
5304 w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
5305 w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
5306 w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5307 w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5308 w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5309 w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5310 w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5311 w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5312 w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5313 w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5314 w0[1] = amd_bytealign (w0[0], 0, offset_minus_4);
5315 w0[0] = 0;
5316
5317 if (offset_mod_4 == 0)
5318 {
5319 w0[1] = w0[2];
5320 w0[2] = w0[3];
5321 w0[3] = w1[0];
5322 w1[0] = w1[1];
5323 w1[1] = w1[2];
5324 w1[2] = w1[3];
5325 w1[3] = w2[0];
5326 w2[0] = w2[1];
5327 w2[1] = w2[2];
5328 w2[2] = w2[3];
5329 w2[3] = w3[0];
5330 w3[0] = w3[1];
5331 w3[1] = w3[2];
5332 w3[2] = 0;
5333 }
5334
5335 break;
5336
5337 case 2:
5338 w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4);
5339 w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
5340 w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
5341 w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
5342 w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5343 w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5344 w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5345 w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5346 w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5347 w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5348 w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5349 w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5350 w0[2] = amd_bytealign (w0[0], 0, offset_minus_4);
5351 w0[1] = 0;
5352 w0[0] = 0;
5353
5354 if (offset_mod_4 == 0)
5355 {
5356 w0[2] = w0[3];
5357 w0[3] = w1[0];
5358 w1[0] = w1[1];
5359 w1[1] = w1[2];
5360 w1[2] = w1[3];
5361 w1[3] = w2[0];
5362 w2[0] = w2[1];
5363 w2[1] = w2[2];
5364 w2[2] = w2[3];
5365 w2[3] = w3[0];
5366 w3[0] = w3[1];
5367 w3[1] = w3[2];
5368 w3[2] = 0;
5369 }
5370
5371 break;
5372
5373 case 3:
5374 w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4);
5375 w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
5376 w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
5377 w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5378 w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5379 w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5380 w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5381 w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5382 w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5383 w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5384 w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5385 w0[3] = amd_bytealign (w0[0], 0, offset_minus_4);
5386 w0[2] = 0;
5387 w0[1] = 0;
5388 w0[0] = 0;
5389
5390 if (offset_mod_4 == 0)
5391 {
5392 w0[3] = w1[0];
5393 w1[0] = w1[1];
5394 w1[1] = w1[2];
5395 w1[2] = w1[3];
5396 w1[3] = w2[0];
5397 w2[0] = w2[1];
5398 w2[1] = w2[2];
5399 w2[2] = w2[3];
5400 w2[3] = w3[0];
5401 w3[0] = w3[1];
5402 w3[1] = w3[2];
5403 w3[2] = 0;
5404 }
5405
5406 break;
5407
5408 case 4:
5409 w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4);
5410 w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
5411 w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5412 w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5413 w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5414 w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5415 w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5416 w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5417 w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5418 w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5419 w1[0] = amd_bytealign (w0[0], 0, offset_minus_4);
5420 w0[3] = 0;
5421 w0[2] = 0;
5422 w0[1] = 0;
5423 w0[0] = 0;
5424
5425 if (offset_mod_4 == 0)
5426 {
5427 w1[0] = w1[1];
5428 w1[1] = w1[2];
5429 w1[2] = w1[3];
5430 w1[3] = w2[0];
5431 w2[0] = w2[1];
5432 w2[1] = w2[2];
5433 w2[2] = w2[3];
5434 w2[3] = w3[0];
5435 w3[0] = w3[1];
5436 w3[1] = w3[2];
5437 w3[2] = 0;
5438 }
5439
5440 break;
5441
5442 case 5:
5443 w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4);
5444 w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
5445 w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5446 w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5447 w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5448 w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5449 w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5450 w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5451 w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5452 w1[1] = amd_bytealign (w0[0], 0, offset_minus_4);
5453 w1[0] = 0;
5454 w0[3] = 0;
5455 w0[2] = 0;
5456 w0[1] = 0;
5457 w0[0] = 0;
5458
5459 if (offset_mod_4 == 0)
5460 {
5461 w1[1] = w1[2];
5462 w1[2] = w1[3];
5463 w1[3] = w2[0];
5464 w2[0] = w2[1];
5465 w2[1] = w2[2];
5466 w2[2] = w2[3];
5467 w2[3] = w3[0];
5468 w3[0] = w3[1];
5469 w3[1] = w3[2];
5470 w3[2] = 0;
5471 }
5472
5473 break;
5474
5475 case 6:
5476 w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4);
5477 w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
5478 w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5479 w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5480 w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5481 w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5482 w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5483 w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5484 w1[2] = amd_bytealign (w0[0], 0, offset_minus_4);
5485 w1[1] = 0;
5486 w1[0] = 0;
5487 w0[3] = 0;
5488 w0[2] = 0;
5489 w0[1] = 0;
5490 w0[0] = 0;
5491
5492 if (offset_mod_4 == 0)
5493 {
5494 w1[2] = w1[3];
5495 w1[3] = w2[0];
5496 w2[0] = w2[1];
5497 w2[1] = w2[2];
5498 w2[2] = w2[3];
5499 w2[3] = w3[0];
5500 w3[0] = w3[1];
5501 w3[1] = w3[2];
5502 w3[2] = 0;
5503 }
5504
5505 break;
5506
5507 case 7:
5508 w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4);
5509 w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
5510 w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5511 w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5512 w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5513 w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5514 w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5515 w1[3] = amd_bytealign (w0[0], 0, offset_minus_4);
5516 w1[2] = 0;
5517 w1[1] = 0;
5518 w1[0] = 0;
5519 w0[3] = 0;
5520 w0[2] = 0;
5521 w0[1] = 0;
5522 w0[0] = 0;
5523
5524 if (offset_mod_4 == 0)
5525 {
5526 w1[3] = w2[0];
5527 w2[0] = w2[1];
5528 w2[1] = w2[2];
5529 w2[2] = w2[3];
5530 w2[3] = w3[0];
5531 w3[0] = w3[1];
5532 w3[1] = w3[2];
5533 w3[2] = 0;
5534 }
5535
5536 break;
5537
5538 case 8:
5539 w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4);
5540 w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
5541 w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5542 w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5543 w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5544 w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5545 w2[0] = amd_bytealign (w0[0], 0, offset_minus_4);
5546 w1[3] = 0;
5547 w1[2] = 0;
5548 w1[1] = 0;
5549 w1[0] = 0;
5550 w0[3] = 0;
5551 w0[2] = 0;
5552 w0[1] = 0;
5553 w0[0] = 0;
5554
5555 if (offset_mod_4 == 0)
5556 {
5557 w2[0] = w2[1];
5558 w2[1] = w2[2];
5559 w2[2] = w2[3];
5560 w2[3] = w3[0];
5561 w3[0] = w3[1];
5562 w3[1] = w3[2];
5563 w3[2] = 0;
5564 }
5565
5566 break;
5567
5568 case 9:
5569 w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4);
5570 w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
5571 w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5572 w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5573 w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5574 w2[1] = amd_bytealign (w0[0], 0, offset_minus_4);
5575 w2[0] = 0;
5576 w1[3] = 0;
5577 w1[2] = 0;
5578 w1[1] = 0;
5579 w1[0] = 0;
5580 w0[3] = 0;
5581 w0[2] = 0;
5582 w0[1] = 0;
5583 w0[0] = 0;
5584
5585 if (offset_mod_4 == 0)
5586 {
5587 w2[1] = w2[2];
5588 w2[2] = w2[3];
5589 w2[3] = w3[0];
5590 w3[0] = w3[1];
5591 w3[1] = w3[2];
5592 w3[2] = 0;
5593 }
5594
5595 break;
5596
5597 case 10:
5598 w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4);
5599 w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
5600 w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5601 w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5602 w2[2] = amd_bytealign (w0[0], 0, offset_minus_4);
5603 w2[1] = 0;
5604 w2[0] = 0;
5605 w1[3] = 0;
5606 w1[2] = 0;
5607 w1[1] = 0;
5608 w1[0] = 0;
5609 w0[3] = 0;
5610 w0[2] = 0;
5611 w0[1] = 0;
5612 w0[0] = 0;
5613
5614 if (offset_mod_4 == 0)
5615 {
5616 w2[2] = w2[3];
5617 w2[3] = w3[0];
5618 w3[0] = w3[1];
5619 w3[1] = w3[2];
5620 w3[2] = 0;
5621 }
5622
5623 break;
5624
5625 case 11:
5626 w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4);
5627 w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
5628 w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5629 w2[3] = amd_bytealign (w0[0], 0, offset_minus_4);
5630 w2[2] = 0;
5631 w2[1] = 0;
5632 w2[0] = 0;
5633 w1[3] = 0;
5634 w1[2] = 0;
5635 w1[1] = 0;
5636 w1[0] = 0;
5637 w0[3] = 0;
5638 w0[2] = 0;
5639 w0[1] = 0;
5640 w0[0] = 0;
5641
5642 if (offset_mod_4 == 0)
5643 {
5644 w2[3] = w3[0];
5645 w3[0] = w3[1];
5646 w3[1] = w3[2];
5647 w3[2] = 0;
5648 }
5649
5650 break;
5651
5652 case 12:
5653 w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4);
5654 w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
5655 w3[0] = amd_bytealign (w0[0], 0, offset_minus_4);
5656 w2[3] = 0;
5657 w2[2] = 0;
5658 w2[1] = 0;
5659 w2[0] = 0;
5660 w1[3] = 0;
5661 w1[2] = 0;
5662 w1[1] = 0;
5663 w1[0] = 0;
5664 w0[3] = 0;
5665 w0[2] = 0;
5666 w0[1] = 0;
5667 w0[0] = 0;
5668
5669 if (offset_mod_4 == 0)
5670 {
5671 w3[0] = w3[1];
5672 w3[1] = w3[2];
5673 w3[2] = 0;
5674 }
5675
5676 break;
5677
5678 case 13:
5679 w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4);
5680 w3[1] = amd_bytealign (w0[0], 0, offset_minus_4);
5681 w3[0] = 0;
5682 w2[3] = 0;
5683 w2[2] = 0;
5684 w2[1] = 0;
5685 w2[0] = 0;
5686 w1[3] = 0;
5687 w1[2] = 0;
5688 w1[1] = 0;
5689 w1[0] = 0;
5690 w0[3] = 0;
5691 w0[2] = 0;
5692 w0[1] = 0;
5693 w0[0] = 0;
5694
5695 if (offset_mod_4 == 0)
5696 {
5697 w3[1] = w3[2];
5698 w3[2] = 0;
5699 }
5700
5701 break;
5702 }
5703 #endif
5704
5705 #ifdef IS_NV
5706 const int offset_minus_4 = 4 - (offset % 4);
5707
5708 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
5709
5710 switch (offset / 4)
5711 {
5712 case 0:
5713 w3[1] = __byte_perm (w3[0], w3[1], selector);
5714 w3[0] = __byte_perm (w2[3], w3[0], selector);
5715 w2[3] = __byte_perm (w2[2], w2[3], selector);
5716 w2[2] = __byte_perm (w2[1], w2[2], selector);
5717 w2[1] = __byte_perm (w2[0], w2[1], selector);
5718 w2[0] = __byte_perm (w1[3], w2[0], selector);
5719 w1[3] = __byte_perm (w1[2], w1[3], selector);
5720 w1[2] = __byte_perm (w1[1], w1[2], selector);
5721 w1[1] = __byte_perm (w1[0], w1[1], selector);
5722 w1[0] = __byte_perm (w0[3], w1[0], selector);
5723 w0[3] = __byte_perm (w0[2], w0[3], selector);
5724 w0[2] = __byte_perm (w0[1], w0[2], selector);
5725 w0[1] = __byte_perm (w0[0], w0[1], selector);
5726 w0[0] = __byte_perm ( 0, w0[0], selector);
5727
5728 break;
5729
5730 case 1:
5731 w3[1] = __byte_perm (w2[3], w3[0], selector);
5732 w3[0] = __byte_perm (w2[2], w2[3], selector);
5733 w2[3] = __byte_perm (w2[1], w2[2], selector);
5734 w2[2] = __byte_perm (w2[0], w2[1], selector);
5735 w2[1] = __byte_perm (w1[3], w2[0], selector);
5736 w2[0] = __byte_perm (w1[2], w1[3], selector);
5737 w1[3] = __byte_perm (w1[1], w1[2], selector);
5738 w1[2] = __byte_perm (w1[0], w1[1], selector);
5739 w1[1] = __byte_perm (w0[3], w1[0], selector);
5740 w1[0] = __byte_perm (w0[2], w0[3], selector);
5741 w0[3] = __byte_perm (w0[1], w0[2], selector);
5742 w0[2] = __byte_perm (w0[0], w0[1], selector);
5743 w0[1] = __byte_perm ( 0, w0[0], selector);
5744 w0[0] = 0;
5745
5746 break;
5747
5748 case 2:
5749 w3[1] = __byte_perm (w2[2], w2[3], selector);
5750 w3[0] = __byte_perm (w2[1], w2[2], selector);
5751 w2[3] = __byte_perm (w2[0], w2[1], selector);
5752 w2[2] = __byte_perm (w1[3], w2[0], selector);
5753 w2[1] = __byte_perm (w1[2], w1[3], selector);
5754 w2[0] = __byte_perm (w1[1], w1[2], selector);
5755 w1[3] = __byte_perm (w1[0], w1[1], selector);
5756 w1[2] = __byte_perm (w0[3], w1[0], selector);
5757 w1[1] = __byte_perm (w0[2], w0[3], selector);
5758 w1[0] = __byte_perm (w0[1], w0[2], selector);
5759 w0[3] = __byte_perm (w0[0], w0[1], selector);
5760 w0[2] = __byte_perm ( 0, w0[0], selector);
5761 w0[1] = 0;
5762 w0[0] = 0;
5763
5764 break;
5765
5766 case 3:
5767 w3[1] = __byte_perm (w2[1], w2[2], selector);
5768 w3[0] = __byte_perm (w2[0], w2[1], selector);
5769 w2[3] = __byte_perm (w1[3], w2[0], selector);
5770 w2[2] = __byte_perm (w1[2], w1[3], selector);
5771 w2[1] = __byte_perm (w1[1], w1[2], selector);
5772 w2[0] = __byte_perm (w1[0], w1[1], selector);
5773 w1[3] = __byte_perm (w0[3], w1[0], selector);
5774 w1[2] = __byte_perm (w0[2], w0[3], selector);
5775 w1[1] = __byte_perm (w0[1], w0[2], selector);
5776 w1[0] = __byte_perm (w0[0], w0[1], selector);
5777 w0[3] = __byte_perm ( 0, w0[0], selector);
5778 w0[2] = 0;
5779 w0[1] = 0;
5780 w0[0] = 0;
5781
5782 break;
5783
5784 case 4:
5785 w3[1] = __byte_perm (w2[0], w2[1], selector);
5786 w3[0] = __byte_perm (w1[3], w2[0], selector);
5787 w2[3] = __byte_perm (w1[2], w1[3], selector);
5788 w2[2] = __byte_perm (w1[1], w1[2], selector);
5789 w2[1] = __byte_perm (w1[0], w1[1], selector);
5790 w2[0] = __byte_perm (w0[3], w1[0], selector);
5791 w1[3] = __byte_perm (w0[2], w0[3], selector);
5792 w1[2] = __byte_perm (w0[1], w0[2], selector);
5793 w1[1] = __byte_perm (w0[0], w0[1], selector);
5794 w1[0] = __byte_perm ( 0, w0[0], selector);
5795 w0[3] = 0;
5796 w0[2] = 0;
5797 w0[1] = 0;
5798 w0[0] = 0;
5799
5800 break;
5801
5802 case 5:
5803 w3[1] = __byte_perm (w1[3], w2[0], selector);
5804 w3[0] = __byte_perm (w1[2], w1[3], selector);
5805 w2[3] = __byte_perm (w1[1], w1[2], selector);
5806 w2[2] = __byte_perm (w1[0], w1[1], selector);
5807 w2[1] = __byte_perm (w0[3], w1[0], selector);
5808 w2[0] = __byte_perm (w0[2], w0[3], selector);
5809 w1[3] = __byte_perm (w0[1], w0[2], selector);
5810 w1[2] = __byte_perm (w0[0], w0[1], selector);
5811 w1[1] = __byte_perm ( 0, w0[0], selector);
5812 w1[0] = 0;
5813 w0[3] = 0;
5814 w0[2] = 0;
5815 w0[1] = 0;
5816 w0[0] = 0;
5817
5818 break;
5819
5820 case 6:
5821 w3[1] = __byte_perm (w1[2], w1[3], selector);
5822 w3[0] = __byte_perm (w1[1], w1[2], selector);
5823 w2[3] = __byte_perm (w1[0], w1[1], selector);
5824 w2[2] = __byte_perm (w0[3], w1[0], selector);
5825 w2[1] = __byte_perm (w0[2], w0[3], selector);
5826 w2[0] = __byte_perm (w0[1], w0[2], selector);
5827 w1[3] = __byte_perm (w0[0], w0[1], selector);
5828 w1[2] = __byte_perm ( 0, w0[0], selector);
5829 w1[1] = 0;
5830 w1[0] = 0;
5831 w0[3] = 0;
5832 w0[2] = 0;
5833 w0[1] = 0;
5834 w0[0] = 0;
5835
5836 break;
5837
5838 case 7:
5839 w3[1] = __byte_perm (w1[1], w1[2], selector);
5840 w3[0] = __byte_perm (w1[0], w1[1], selector);
5841 w2[3] = __byte_perm (w0[3], w1[0], selector);
5842 w2[2] = __byte_perm (w0[2], w0[3], selector);
5843 w2[1] = __byte_perm (w0[1], w0[2], selector);
5844 w2[0] = __byte_perm (w0[0], w0[1], selector);
5845 w1[3] = __byte_perm ( 0, w0[0], selector);
5846 w1[2] = 0;
5847 w1[1] = 0;
5848 w1[0] = 0;
5849 w0[3] = 0;
5850 w0[2] = 0;
5851 w0[1] = 0;
5852 w0[0] = 0;
5853
5854 break;
5855
5856 case 8:
5857 w3[1] = __byte_perm (w1[0], w1[1], selector);
5858 w3[0] = __byte_perm (w0[3], w1[0], selector);
5859 w2[3] = __byte_perm (w0[2], w0[3], selector);
5860 w2[2] = __byte_perm (w0[1], w0[2], selector);
5861 w2[1] = __byte_perm (w0[0], w0[1], selector);
5862 w2[0] = __byte_perm ( 0, w0[0], selector);
5863 w1[3] = 0;
5864 w1[2] = 0;
5865 w1[1] = 0;
5866 w1[0] = 0;
5867 w0[3] = 0;
5868 w0[2] = 0;
5869 w0[1] = 0;
5870 w0[0] = 0;
5871
5872 break;
5873
5874 case 9:
5875 w3[1] = __byte_perm (w0[3], w1[0], selector);
5876 w3[0] = __byte_perm (w0[2], w0[3], selector);
5877 w2[3] = __byte_perm (w0[1], w0[2], selector);
5878 w2[2] = __byte_perm (w0[0], w0[1], selector);
5879 w2[1] = __byte_perm ( 0, w0[0], selector);
5880 w2[0] = 0;
5881 w1[3] = 0;
5882 w1[2] = 0;
5883 w1[1] = 0;
5884 w1[0] = 0;
5885 w0[3] = 0;
5886 w0[2] = 0;
5887 w0[1] = 0;
5888 w0[0] = 0;
5889
5890 break;
5891
5892 case 10:
5893 w3[1] = __byte_perm (w0[2], w0[3], selector);
5894 w3[0] = __byte_perm (w0[1], w0[2], selector);
5895 w2[3] = __byte_perm (w0[0], w0[1], selector);
5896 w2[2] = __byte_perm ( 0, w0[0], selector);
5897 w2[1] = 0;
5898 w2[0] = 0;
5899 w1[3] = 0;
5900 w1[2] = 0;
5901 w1[1] = 0;
5902 w1[0] = 0;
5903 w0[3] = 0;
5904 w0[2] = 0;
5905 w0[1] = 0;
5906 w0[0] = 0;
5907
5908 break;
5909
5910 case 11:
5911 w3[1] = __byte_perm (w0[1], w0[2], selector);
5912 w3[0] = __byte_perm (w0[0], w0[1], selector);
5913 w2[3] = __byte_perm ( 0, w0[0], selector);
5914 w2[2] = 0;
5915 w2[1] = 0;
5916 w2[0] = 0;
5917 w1[3] = 0;
5918 w1[2] = 0;
5919 w1[1] = 0;
5920 w1[0] = 0;
5921 w0[3] = 0;
5922 w0[2] = 0;
5923 w0[1] = 0;
5924 w0[0] = 0;
5925
5926 break;
5927
5928 case 12:
5929 w3[1] = __byte_perm (w0[0], w0[1], selector);
5930 w3[0] = __byte_perm ( 0, w0[0], selector);
5931 w2[3] = 0;
5932 w2[2] = 0;
5933 w2[1] = 0;
5934 w2[0] = 0;
5935 w1[3] = 0;
5936 w1[2] = 0;
5937 w1[1] = 0;
5938 w1[0] = 0;
5939 w0[3] = 0;
5940 w0[2] = 0;
5941 w0[1] = 0;
5942 w0[0] = 0;
5943
5944 break;
5945
5946 case 13:
5947 w3[1] = __byte_perm ( 0, w0[0], selector);
5948 w3[0] = 0;
5949 w2[3] = 0;
5950 w2[2] = 0;
5951 w2[1] = 0;
5952 w2[0] = 0;
5953 w1[3] = 0;
5954 w1[2] = 0;
5955 w1[1] = 0;
5956 w1[0] = 0;
5957 w0[3] = 0;
5958 w0[2] = 0;
5959 w0[1] = 0;
5960 w0[0] = 0;
5961
5962 break;
5963 }
5964 #endif
5965 }
5966
5967 static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
5968 {
5969 #if defined IS_AMD || defined IS_GENERIC
5970 switch (offset / 4)
5971 {
5972 case 0:
5973 w3[2] = amd_bytealign (w3[1], 0, offset);
5974 w3[1] = amd_bytealign (w3[0], w3[1], offset);
5975 w3[0] = amd_bytealign (w2[3], w3[0], offset);
5976 w2[3] = amd_bytealign (w2[2], w2[3], offset);
5977 w2[2] = amd_bytealign (w2[1], w2[2], offset);
5978 w2[1] = amd_bytealign (w2[0], w2[1], offset);
5979 w2[0] = amd_bytealign (w1[3], w2[0], offset);
5980 w1[3] = amd_bytealign (w1[2], w1[3], offset);
5981 w1[2] = amd_bytealign (w1[1], w1[2], offset);
5982 w1[1] = amd_bytealign (w1[0], w1[1], offset);
5983 w1[0] = amd_bytealign (w0[3], w1[0], offset);
5984 w0[3] = amd_bytealign (w0[2], w0[3], offset);
5985 w0[2] = amd_bytealign (w0[1], w0[2], offset);
5986 w0[1] = amd_bytealign (w0[0], w0[1], offset);
5987 w0[0] = amd_bytealign ( 0, w0[0], offset);
5988 break;
5989
5990 case 1:
5991 w3[2] = amd_bytealign (w3[0], 0, offset);
5992 w3[1] = amd_bytealign (w2[3], w3[0], offset);
5993 w3[0] = amd_bytealign (w2[2], w2[3], offset);
5994 w2[3] = amd_bytealign (w2[1], w2[2], offset);
5995 w2[2] = amd_bytealign (w2[0], w2[1], offset);
5996 w2[1] = amd_bytealign (w1[3], w2[0], offset);
5997 w2[0] = amd_bytealign (w1[2], w1[3], offset);
5998 w1[3] = amd_bytealign (w1[1], w1[2], offset);
5999 w1[2] = amd_bytealign (w1[0], w1[1], offset);
6000 w1[1] = amd_bytealign (w0[3], w1[0], offset);
6001 w1[0] = amd_bytealign (w0[2], w0[3], offset);
6002 w0[3] = amd_bytealign (w0[1], w0[2], offset);
6003 w0[2] = amd_bytealign (w0[0], w0[1], offset);
6004 w0[1] = amd_bytealign ( 0, w0[0], offset);
6005 w0[0] = 0;
6006 break;
6007
6008 case 2:
6009 w3[2] = amd_bytealign (w2[3], 0, offset);
6010 w3[1] = amd_bytealign (w2[2], w2[3], offset);
6011 w3[0] = amd_bytealign (w2[1], w2[2], offset);
6012 w2[3] = amd_bytealign (w2[0], w2[1], offset);
6013 w2[2] = amd_bytealign (w1[3], w2[0], offset);
6014 w2[1] = amd_bytealign (w1[2], w1[3], offset);
6015 w2[0] = amd_bytealign (w1[1], w1[2], offset);
6016 w1[3] = amd_bytealign (w1[0], w1[1], offset);
6017 w1[2] = amd_bytealign (w0[3], w1[0], offset);
6018 w1[1] = amd_bytealign (w0[2], w0[3], offset);
6019 w1[0] = amd_bytealign (w0[1], w0[2], offset);
6020 w0[3] = amd_bytealign (w0[0], w0[1], offset);
6021 w0[2] = amd_bytealign ( 0, w0[0], offset);
6022 w0[1] = 0;
6023 w0[0] = 0;
6024 break;
6025
6026 case 3:
6027 w3[2] = amd_bytealign (w2[2], 0, offset);
6028 w3[1] = amd_bytealign (w2[1], w2[2], offset);
6029 w3[0] = amd_bytealign (w2[0], w2[1], offset);
6030 w2[3] = amd_bytealign (w1[3], w2[0], offset);
6031 w2[2] = amd_bytealign (w1[2], w1[3], offset);
6032 w2[1] = amd_bytealign (w1[1], w1[2], offset);
6033 w2[0] = amd_bytealign (w1[0], w1[1], offset);
6034 w1[3] = amd_bytealign (w0[3], w1[0], offset);
6035 w1[2] = amd_bytealign (w0[2], w0[3], offset);
6036 w1[1] = amd_bytealign (w0[1], w0[2], offset);
6037 w1[0] = amd_bytealign (w0[0], w0[1], offset);
6038 w0[3] = amd_bytealign ( 0, w0[0], offset);
6039 w0[2] = 0;
6040 w0[1] = 0;
6041 w0[0] = 0;
6042 break;
6043
6044 case 4:
6045 w3[2] = amd_bytealign (w2[1], 0, offset);
6046 w3[1] = amd_bytealign (w2[0], w2[1], offset);
6047 w3[0] = amd_bytealign (w1[3], w2[0], offset);
6048 w2[3] = amd_bytealign (w1[2], w1[3], offset);
6049 w2[2] = amd_bytealign (w1[1], w1[2], offset);
6050 w2[1] = amd_bytealign (w1[0], w1[1], offset);
6051 w2[0] = amd_bytealign (w0[3], w1[0], offset);
6052 w1[3] = amd_bytealign (w0[2], w0[3], offset);
6053 w1[2] = amd_bytealign (w0[1], w0[2], offset);
6054 w1[1] = amd_bytealign (w0[0], w0[1], offset);
6055 w1[0] = amd_bytealign ( 0, w0[0], offset);
6056 w0[3] = 0;
6057 w0[2] = 0;
6058 w0[1] = 0;
6059 w0[0] = 0;
6060 break;
6061
6062 case 5:
6063 w3[2] = amd_bytealign (w2[0], 0, offset);
6064 w3[1] = amd_bytealign (w1[3], w2[0], offset);
6065 w3[0] = amd_bytealign (w1[2], w1[3], offset);
6066 w2[3] = amd_bytealign (w1[1], w1[2], offset);
6067 w2[2] = amd_bytealign (w1[0], w1[1], offset);
6068 w2[1] = amd_bytealign (w0[3], w1[0], offset);
6069 w2[0] = amd_bytealign (w0[2], w0[3], offset);
6070 w1[3] = amd_bytealign (w0[1], w0[2], offset);
6071 w1[2] = amd_bytealign (w0[0], w0[1], offset);
6072 w1[1] = amd_bytealign ( 0, w0[0], offset);
6073 w1[0] = 0;
6074 w0[3] = 0;
6075 w0[2] = 0;
6076 w0[1] = 0;
6077 w0[0] = 0;
6078 break;
6079
6080 case 6:
6081 w3[2] = amd_bytealign (w1[3], 0, offset);
6082 w3[1] = amd_bytealign (w1[2], w1[3], offset);
6083 w3[0] = amd_bytealign (w1[1], w1[2], offset);
6084 w2[3] = amd_bytealign (w1[0], w1[1], offset);
6085 w2[2] = amd_bytealign (w0[3], w1[0], offset);
6086 w2[1] = amd_bytealign (w0[2], w0[3], offset);
6087 w2[0] = amd_bytealign (w0[1], w0[2], offset);
6088 w1[3] = amd_bytealign (w0[0], w0[1], offset);
6089 w1[2] = amd_bytealign ( 0, w0[0], offset);
6090 w1[1] = 0;
6091 w1[0] = 0;
6092 w0[3] = 0;
6093 w0[2] = 0;
6094 w0[1] = 0;
6095 w0[0] = 0;
6096 break;
6097
6098 case 7:
6099 w3[2] = amd_bytealign (w1[2], 0, offset);
6100 w3[1] = amd_bytealign (w1[1], w1[2], offset);
6101 w3[0] = amd_bytealign (w1[0], w1[1], offset);
6102 w2[3] = amd_bytealign (w0[3], w1[0], offset);
6103 w2[2] = amd_bytealign (w0[2], w0[3], offset);
6104 w2[1] = amd_bytealign (w0[1], w0[2], offset);
6105 w2[0] = amd_bytealign (w0[0], w0[1], offset);
6106 w1[3] = amd_bytealign ( 0, w0[0], offset);
6107 w1[2] = 0;
6108 w1[1] = 0;
6109 w1[0] = 0;
6110 w0[3] = 0;
6111 w0[2] = 0;
6112 w0[1] = 0;
6113 w0[0] = 0;
6114 break;
6115
6116 case 8:
6117 w3[2] = amd_bytealign (w1[1], 0, offset);
6118 w3[1] = amd_bytealign (w1[0], w1[1], offset);
6119 w3[0] = amd_bytealign (w0[3], w1[0], offset);
6120 w2[3] = amd_bytealign (w0[2], w0[3], offset);
6121 w2[2] = amd_bytealign (w0[1], w0[2], offset);
6122 w2[1] = amd_bytealign (w0[0], w0[1], offset);
6123 w2[0] = amd_bytealign ( 0, w0[0], offset);
6124 w1[3] = 0;
6125 w1[2] = 0;
6126 w1[1] = 0;
6127 w1[0] = 0;
6128 w0[3] = 0;
6129 w0[2] = 0;
6130 w0[1] = 0;
6131 w0[0] = 0;
6132 break;
6133
6134 case 9:
6135 w3[2] = amd_bytealign (w1[0], 0, offset);
6136 w3[1] = amd_bytealign (w0[3], w1[0], offset);
6137 w3[0] = amd_bytealign (w0[2], w0[3], offset);
6138 w2[3] = amd_bytealign (w0[1], w0[2], offset);
6139 w2[2] = amd_bytealign (w0[0], w0[1], offset);
6140 w2[1] = amd_bytealign ( 0, w0[0], offset);
6141 w2[0] = 0;
6142 w1[3] = 0;
6143 w1[2] = 0;
6144 w1[1] = 0;
6145 w1[0] = 0;
6146 w0[3] = 0;
6147 w0[2] = 0;
6148 w0[1] = 0;
6149 w0[0] = 0;
6150 break;
6151
6152 case 10:
6153 w3[2] = amd_bytealign (w0[3], 0, offset);
6154 w3[1] = amd_bytealign (w0[2], w0[3], offset);
6155 w3[0] = amd_bytealign (w0[1], w0[2], offset);
6156 w2[3] = amd_bytealign (w0[0], w0[1], offset);
6157 w2[2] = amd_bytealign ( 0, w0[0], offset);
6158 w2[1] = 0;
6159 w2[0] = 0;
6160 w1[3] = 0;
6161 w1[2] = 0;
6162 w1[1] = 0;
6163 w1[0] = 0;
6164 w0[3] = 0;
6165 w0[2] = 0;
6166 w0[1] = 0;
6167 w0[0] = 0;
6168 break;
6169
6170 case 11:
6171 w3[2] = amd_bytealign (w0[2], 0, offset);
6172 w3[1] = amd_bytealign (w0[1], w0[2], offset);
6173 w3[0] = amd_bytealign (w0[0], w0[1], offset);
6174 w2[3] = amd_bytealign ( 0, w0[0], offset);
6175 w2[2] = 0;
6176 w2[1] = 0;
6177 w2[0] = 0;
6178 w1[3] = 0;
6179 w1[2] = 0;
6180 w1[1] = 0;
6181 w1[0] = 0;
6182 w0[3] = 0;
6183 w0[2] = 0;
6184 w0[1] = 0;
6185 w0[0] = 0;
6186 break;
6187
6188 case 12:
6189 w3[2] = amd_bytealign (w0[1], 0, offset);
6190 w3[1] = amd_bytealign (w0[0], w0[1], offset);
6191 w3[0] = amd_bytealign ( 0, w0[0], offset);
6192 w2[3] = 0;
6193 w2[2] = 0;
6194 w2[1] = 0;
6195 w2[0] = 0;
6196 w1[3] = 0;
6197 w1[2] = 0;
6198 w1[1] = 0;
6199 w1[0] = 0;
6200 w0[3] = 0;
6201 w0[2] = 0;
6202 w0[1] = 0;
6203 w0[0] = 0;
6204 break;
6205
6206 case 13:
6207 w3[2] = amd_bytealign (w0[0], 0, offset);
6208 w3[1] = amd_bytealign ( 0, w0[0], offset);
6209 w3[0] = 0;
6210 w2[3] = 0;
6211 w2[2] = 0;
6212 w2[1] = 0;
6213 w2[0] = 0;
6214 w1[3] = 0;
6215 w1[2] = 0;
6216 w1[1] = 0;
6217 w1[0] = 0;
6218 w0[3] = 0;
6219 w0[2] = 0;
6220 w0[1] = 0;
6221 w0[0] = 0;
6222 break;
6223 }
6224 #endif
6225
6226 #ifdef IS_NV
6227 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
6228
6229 switch (offset / 4)
6230 {
6231 case 0:
6232 w3[1] = __byte_perm (w3[1], w3[0], selector);
6233 w3[0] = __byte_perm (w3[0], w2[3], selector);
6234 w2[3] = __byte_perm (w2[3], w2[2], selector);
6235 w2[2] = __byte_perm (w2[2], w2[1], selector);
6236 w2[1] = __byte_perm (w2[1], w2[0], selector);
6237 w2[0] = __byte_perm (w2[0], w1[3], selector);
6238 w1[3] = __byte_perm (w1[3], w1[2], selector);
6239 w1[2] = __byte_perm (w1[2], w1[1], selector);
6240 w1[1] = __byte_perm (w1[1], w1[0], selector);
6241 w1[0] = __byte_perm (w1[0], w0[3], selector);
6242 w0[3] = __byte_perm (w0[3], w0[2], selector);
6243 w0[2] = __byte_perm (w0[2], w0[1], selector);
6244 w0[1] = __byte_perm (w0[1], w0[0], selector);
6245 w0[0] = __byte_perm (w0[0], 0, selector);
6246 break;
6247
6248 case 1:
6249 w3[1] = __byte_perm (w3[0], w2[3], selector);
6250 w3[0] = __byte_perm (w2[3], w2[2], selector);
6251 w2[3] = __byte_perm (w2[2], w2[1], selector);
6252 w2[2] = __byte_perm (w2[1], w2[0], selector);
6253 w2[1] = __byte_perm (w2[0], w1[3], selector);
6254 w2[0] = __byte_perm (w1[3], w1[2], selector);
6255 w1[3] = __byte_perm (w1[2], w1[1], selector);
6256 w1[2] = __byte_perm (w1[1], w1[0], selector);
6257 w1[1] = __byte_perm (w1[0], w0[3], selector);
6258 w1[0] = __byte_perm (w0[3], w0[2], selector);
6259 w0[3] = __byte_perm (w0[2], w0[1], selector);
6260 w0[2] = __byte_perm (w0[1], w0[0], selector);
6261 w0[1] = __byte_perm (w0[0], 0, selector);
6262 w0[0] = 0;
6263 break;
6264
6265 case 2:
6266 w3[1] = __byte_perm (w2[3], w2[2], selector);
6267 w3[0] = __byte_perm (w2[2], w2[1], selector);
6268 w2[3] = __byte_perm (w2[1], w2[0], selector);
6269 w2[2] = __byte_perm (w2[0], w1[3], selector);
6270 w2[1] = __byte_perm (w1[3], w1[2], selector);
6271 w2[0] = __byte_perm (w1[2], w1[1], selector);
6272 w1[3] = __byte_perm (w1[1], w1[0], selector);
6273 w1[2] = __byte_perm (w1[0], w0[3], selector);
6274 w1[1] = __byte_perm (w0[3], w0[2], selector);
6275 w1[0] = __byte_perm (w0[2], w0[1], selector);
6276 w0[3] = __byte_perm (w0[1], w0[0], selector);
6277 w0[2] = __byte_perm (w0[0], 0, selector);
6278 w0[1] = 0;
6279 w0[0] = 0;
6280 break;
6281
6282 case 3:
6283 w3[1] = __byte_perm (w2[2], w2[1], selector);
6284 w3[0] = __byte_perm (w2[1], w2[0], selector);
6285 w2[3] = __byte_perm (w2[0], w1[3], selector);
6286 w2[2] = __byte_perm (w1[3], w1[2], selector);
6287 w2[1] = __byte_perm (w1[2], w1[1], selector);
6288 w2[0] = __byte_perm (w1[1], w1[0], selector);
6289 w1[3] = __byte_perm (w1[0], w0[3], selector);
6290 w1[2] = __byte_perm (w0[3], w0[2], selector);
6291 w1[1] = __byte_perm (w0[2], w0[1], selector);
6292 w1[0] = __byte_perm (w0[1], w0[0], selector);
6293 w0[3] = __byte_perm (w0[0], 0, selector);
6294 w0[2] = 0;
6295 w0[1] = 0;
6296 w0[0] = 0;
6297 break;
6298
6299 case 4:
6300 w3[1] = __byte_perm (w2[1], w2[0], selector);
6301 w3[0] = __byte_perm (w2[0], w1[3], selector);
6302 w2[3] = __byte_perm (w1[3], w1[2], selector);
6303 w2[2] = __byte_perm (w1[2], w1[1], selector);
6304 w2[1] = __byte_perm (w1[1], w1[0], selector);
6305 w2[0] = __byte_perm (w1[0], w0[3], selector);
6306 w1[3] = __byte_perm (w0[3], w0[2], selector);
6307 w1[2] = __byte_perm (w0[2], w0[1], selector);
6308 w1[1] = __byte_perm (w0[1], w0[0], selector);
6309 w1[0] = __byte_perm (w0[0], 0, selector);
6310 w0[3] = 0;
6311 w0[2] = 0;
6312 w0[1] = 0;
6313 w0[0] = 0;
6314 break;
6315
6316 case 5:
6317 w3[1] = __byte_perm (w2[0], w1[3], selector);
6318 w3[0] = __byte_perm (w1[3], w1[2], selector);
6319 w2[3] = __byte_perm (w1[2], w1[1], selector);
6320 w2[2] = __byte_perm (w1[1], w1[0], selector);
6321 w2[1] = __byte_perm (w1[0], w0[3], selector);
6322 w2[0] = __byte_perm (w0[3], w0[2], selector);
6323 w1[3] = __byte_perm (w0[2], w0[1], selector);
6324 w1[2] = __byte_perm (w0[1], w0[0], selector);
6325 w1[1] = __byte_perm (w0[0], 0, selector);
6326 w1[0] = 0;
6327 w0[3] = 0;
6328 w0[2] = 0;
6329 w0[1] = 0;
6330 w0[0] = 0;
6331 break;
6332
6333 case 6:
6334 w3[1] = __byte_perm (w1[3], w1[2], selector);
6335 w3[0] = __byte_perm (w1[2], w1[1], selector);
6336 w2[3] = __byte_perm (w1[1], w1[0], selector);
6337 w2[2] = __byte_perm (w1[0], w0[3], selector);
6338 w2[1] = __byte_perm (w0[3], w0[2], selector);
6339 w2[0] = __byte_perm (w0[2], w0[1], selector);
6340 w1[3] = __byte_perm (w0[1], w0[0], selector);
6341 w1[2] = __byte_perm (w0[0], 0, selector);
6342 w1[1] = 0;
6343 w1[0] = 0;
6344 w0[3] = 0;
6345 w0[2] = 0;
6346 w0[1] = 0;
6347 w0[0] = 0;
6348 break;
6349
6350 case 7:
6351 w3[1] = __byte_perm (w1[2], w1[1], selector);
6352 w3[0] = __byte_perm (w1[1], w1[0], selector);
6353 w2[3] = __byte_perm (w1[0], w0[3], selector);
6354 w2[2] = __byte_perm (w0[3], w0[2], selector);
6355 w2[1] = __byte_perm (w0[2], w0[1], selector);
6356 w2[0] = __byte_perm (w0[1], w0[0], selector);
6357 w1[3] = __byte_perm (w0[0], 0, selector);
6358 w1[2] = 0;
6359 w1[1] = 0;
6360 w1[0] = 0;
6361 w0[3] = 0;
6362 w0[2] = 0;
6363 w0[1] = 0;
6364 w0[0] = 0;
6365 break;
6366
6367 case 8:
6368 w3[1] = __byte_perm (w1[1], w1[0], selector);
6369 w3[0] = __byte_perm (w1[0], w0[3], selector);
6370 w2[3] = __byte_perm (w0[3], w0[2], selector);
6371 w2[2] = __byte_perm (w0[2], w0[1], selector);
6372 w2[1] = __byte_perm (w0[1], w0[0], selector);
6373 w2[0] = __byte_perm (w0[0], 0, selector);
6374 w1[3] = 0;
6375 w1[2] = 0;
6376 w1[1] = 0;
6377 w1[0] = 0;
6378 w0[3] = 0;
6379 w0[2] = 0;
6380 w0[1] = 0;
6381 w0[0] = 0;
6382 break;
6383
6384 case 9:
6385 w3[1] = __byte_perm (w1[0], w0[3], selector);
6386 w3[0] = __byte_perm (w0[3], w0[2], selector);
6387 w2[3] = __byte_perm (w0[2], w0[1], selector);
6388 w2[2] = __byte_perm (w0[1], w0[0], selector);
6389 w2[1] = __byte_perm (w0[0], 0, selector);
6390 w2[0] = 0;
6391 w1[3] = 0;
6392 w1[2] = 0;
6393 w1[1] = 0;
6394 w1[0] = 0;
6395 w0[3] = 0;
6396 w0[2] = 0;
6397 w0[1] = 0;
6398 w0[0] = 0;
6399 break;
6400
6401 case 10:
6402 w3[1] = __byte_perm (w0[3], w0[2], selector);
6403 w3[0] = __byte_perm (w0[2], w0[1], selector);
6404 w2[3] = __byte_perm (w0[1], w0[0], selector);
6405 w2[2] = __byte_perm (w0[0], 0, selector);
6406 w2[1] = 0;
6407 w2[0] = 0;
6408 w1[3] = 0;
6409 w1[2] = 0;
6410 w1[1] = 0;
6411 w1[0] = 0;
6412 w0[3] = 0;
6413 w0[2] = 0;
6414 w0[1] = 0;
6415 w0[0] = 0;
6416 break;
6417
6418 case 11:
6419 w3[1] = __byte_perm (w0[2], w0[1], selector);
6420 w3[0] = __byte_perm (w0[1], w0[0], selector);
6421 w2[3] = __byte_perm (w0[0], 0, selector);
6422 w2[2] = 0;
6423 w2[1] = 0;
6424 w2[0] = 0;
6425 w1[3] = 0;
6426 w1[2] = 0;
6427 w1[1] = 0;
6428 w1[0] = 0;
6429 w0[3] = 0;
6430 w0[2] = 0;
6431 w0[1] = 0;
6432 w0[0] = 0;
6433 break;
6434
6435 case 12:
6436 w3[1] = __byte_perm (w0[1], w0[0], selector);
6437 w3[0] = __byte_perm (w0[0], 0, selector);
6438 w2[3] = 0;
6439 w2[2] = 0;
6440 w2[1] = 0;
6441 w2[0] = 0;
6442 w1[3] = 0;
6443 w1[2] = 0;
6444 w1[1] = 0;
6445 w1[0] = 0;
6446 w0[3] = 0;
6447 w0[2] = 0;
6448 w0[1] = 0;
6449 w0[0] = 0;
6450 break;
6451
6452 case 13:
6453 w3[1] = __byte_perm (w0[0], 0, selector);
6454 w3[0] = 0;
6455 w2[3] = 0;
6456 w2[2] = 0;
6457 w2[1] = 0;
6458 w2[0] = 0;
6459 w1[3] = 0;
6460 w1[2] = 0;
6461 w1[1] = 0;
6462 w1[0] = 0;
6463 w0[3] = 0;
6464 w0[2] = 0;
6465 w0[1] = 0;
6466 w0[0] = 0;
6467 break;
6468 }
6469 #endif
6470 }
6471
6472 /* not needed anymore?
6473
6474 // before: append_0x80_2_be
6475 static void append_0x80_2x4_be (u32 w0[4], u32 w1[4], const u32 offset)
6476 {
6477 switch (offset)
6478 {
6479 case 0:
6480 w0[0] |= 0x80000000;
6481 break;
6482
6483 case 1:
6484 w0[0] |= 0x800000;
6485 break;
6486
6487 case 2:
6488 w0[0] |= 0x8000;
6489 break;
6490
6491 case 3:
6492 w0[0] |= 0x80;
6493 break;
6494
6495 case 4:
6496 w0[1] |= 0x80000000;
6497 break;
6498
6499 case 5:
6500 w0[1] |= 0x800000;
6501 break;
6502
6503 case 6:
6504 w0[1] |= 0x8000;
6505 break;
6506
6507 case 7:
6508 w0[1] |= 0x80;
6509 break;
6510
6511 case 8:
6512 w0[2] |= 0x80000000;
6513 break;
6514
6515 case 9:
6516 w0[2] |= 0x800000;
6517 break;
6518
6519 case 10:
6520 w0[2] |= 0x8000;
6521 break;
6522
6523 case 11:
6524 w0[2] |= 0x80;
6525 break;
6526
6527 case 12:
6528 w0[3] |= 0x80000000;
6529 break;
6530
6531 case 13:
6532 w0[3] |= 0x800000;
6533 break;
6534
6535 case 14:
6536 w0[3] |= 0x8000;
6537 break;
6538
6539 case 15:
6540 w0[3] |= 0x80;
6541 break;
6542
6543 case 16:
6544 w1[0] |= 0x80000000;
6545 break;
6546
6547 case 17:
6548 w1[0] |= 0x800000;
6549 break;
6550
6551 case 18:
6552 w1[0] |= 0x8000;
6553 break;
6554
6555 case 19:
6556 w1[0] |= 0x80;
6557 break;
6558
6559 case 20:
6560 w1[1] |= 0x80000000;
6561 break;
6562
6563 case 21:
6564 w1[1] |= 0x800000;
6565 break;
6566
6567 case 22:
6568 w1[1] |= 0x8000;
6569 break;
6570
6571 case 23:
6572 w1[1] |= 0x80;
6573 break;
6574
6575 case 24:
6576 w1[2] |= 0x80000000;
6577 break;
6578
6579 case 25:
6580 w1[2] |= 0x800000;
6581 break;
6582
6583 case 26:
6584 w1[2] |= 0x8000;
6585 break;
6586
6587 case 27:
6588 w1[2] |= 0x80;
6589 break;
6590
6591 case 28:
6592 w1[3] |= 0x80000000;
6593 break;
6594
6595 case 29:
6596 w1[3] |= 0x800000;
6597 break;
6598
6599 case 30:
6600 w1[3] |= 0x8000;
6601 break;
6602
6603 case 31:
6604 w1[3] |= 0x80;
6605 break;
6606 }
6607 }
6608
6609 // before: append_0x80_8
6610 static void append_0x80_1x32 (u32 w[32], const u32 offset)
6611 {
6612 switch (offset)
6613 {
6614 case 0:
6615 w[ 0] = 0x80;
6616 break;
6617
6618 case 1:
6619 w[ 0] = w[ 0] | 0x8000;
6620 break;
6621
6622 case 2:
6623 w[ 0] = w[ 0] | 0x800000;
6624 break;
6625
6626 case 3:
6627 w[ 0] = w[ 0] | 0x80000000;
6628 break;
6629
6630 case 4:
6631 w[ 1] = 0x80;
6632 break;
6633
6634 case 5:
6635 w[ 1] = w[ 1] | 0x8000;
6636 break;
6637
6638 case 6:
6639 w[ 1] = w[ 1] | 0x800000;
6640 break;
6641
6642 case 7:
6643 w[ 1] = w[ 1] | 0x80000000;
6644 break;
6645
6646 case 8:
6647 w[ 2] = 0x80;
6648 break;
6649
6650 case 9:
6651 w[ 2] = w[ 2] | 0x8000;
6652 break;
6653
6654 case 10:
6655 w[ 2] = w[ 2] | 0x800000;
6656 break;
6657
6658 case 11:
6659 w[ 2] = w[ 2] | 0x80000000;
6660 break;
6661
6662 case 12:
6663 w[ 3] = 0x80;
6664 break;
6665
6666 case 13:
6667 w[ 3] = w[ 3] | 0x8000;
6668 break;
6669
6670 case 14:
6671 w[ 3] = w[ 3] | 0x800000;
6672 break;
6673
6674 case 15:
6675 w[ 3] = w[ 3] | 0x80000000;
6676 break;
6677
6678 case 16:
6679 w[ 4] = 0x80;
6680 break;
6681
6682 case 17:
6683 w[ 4] = w[ 4] | 0x8000;
6684 break;
6685
6686 case 18:
6687 w[ 4] = w[ 4] | 0x800000;
6688 break;
6689
6690 case 19:
6691 w[ 4] = w[ 4] | 0x80000000;
6692 break;
6693
6694 case 20:
6695 w[ 5] = 0x80;
6696 break;
6697
6698 case 21:
6699 w[ 5] = w[ 5] | 0x8000;
6700 break;
6701
6702 case 22:
6703 w[ 5] = w[ 5] | 0x800000;
6704 break;
6705
6706 case 23:
6707 w[ 5] = w[ 5] | 0x80000000;
6708 break;
6709
6710 case 24:
6711 w[ 6] = 0x80;
6712 break;
6713
6714 case 25:
6715 w[ 6] = w[ 6] | 0x8000;
6716 break;
6717
6718 case 26:
6719 w[ 6] = w[ 6] | 0x800000;
6720 break;
6721
6722 case 27:
6723 w[ 6] = w[ 6] | 0x80000000;
6724 break;
6725
6726 case 28:
6727 w[ 7] = 0x80;
6728 break;
6729
6730 case 29:
6731 w[ 7] = w[ 7] | 0x8000;
6732 break;
6733
6734 case 30:
6735 w[ 7] = w[ 7] | 0x800000;
6736 break;
6737
6738 case 31:
6739 w[ 7] = w[ 7] | 0x80000000;
6740 break;
6741
6742 case 32:
6743 w[ 8] = 0x80;
6744 break;
6745
6746 case 33:
6747 w[ 8] = w[ 8] | 0x8000;
6748 break;
6749
6750 case 34:
6751 w[ 8] = w[ 8] | 0x800000;
6752 break;
6753
6754 case 35:
6755 w[ 8] = w[ 8] | 0x80000000;
6756 break;
6757
6758 case 36:
6759 w[ 9] = 0x80;
6760 break;
6761
6762 case 37:
6763 w[ 9] = w[ 9] | 0x8000;
6764 break;
6765
6766 case 38:
6767 w[ 9] = w[ 9] | 0x800000;
6768 break;
6769
6770 case 39:
6771 w[ 9] = w[ 9] | 0x80000000;
6772 break;
6773
6774 case 40:
6775 w[10] = 0x80;
6776 break;
6777
6778 case 41:
6779 w[10] = w[10] | 0x8000;
6780 break;
6781
6782 case 42:
6783 w[10] = w[10] | 0x800000;
6784 break;
6785
6786 case 43:
6787 w[10] = w[10] | 0x80000000;
6788 break;
6789
6790 case 44:
6791 w[11] = 0x80;
6792 break;
6793
6794 case 45:
6795 w[11] = w[11] | 0x8000;
6796 break;
6797
6798 case 46:
6799 w[11] = w[11] | 0x800000;
6800 break;
6801
6802 case 47:
6803 w[11] = w[11] | 0x80000000;
6804 break;
6805
6806 case 48:
6807 w[12] = 0x80;
6808 break;
6809
6810 case 49:
6811 w[12] = w[12] | 0x8000;
6812 break;
6813
6814 case 50:
6815 w[12] = w[12] | 0x800000;
6816 break;
6817
6818 case 51:
6819 w[12] = w[12] | 0x80000000;
6820 break;
6821
6822 case 52:
6823 w[13] = 0x80;
6824 break;
6825
6826 case 53:
6827 w[13] = w[13] | 0x8000;
6828 break;
6829
6830 case 54:
6831 w[13] = w[13] | 0x800000;
6832 break;
6833
6834 case 55:
6835 w[13] = w[13] | 0x80000000;
6836 break;
6837
6838 case 56:
6839 w[14] = 0x80;
6840 break;
6841
6842 case 57:
6843 w[14] = w[14] | 0x8000;
6844 break;
6845
6846 case 58:
6847 w[14] = w[14] | 0x800000;
6848 break;
6849
6850 case 59:
6851 w[14] = w[14] | 0x80000000;
6852 break;
6853
6854 case 60:
6855 w[15] = 0x80;
6856 break;
6857
6858 case 61:
6859 w[15] = w[15] | 0x8000;
6860 break;
6861
6862 case 62:
6863 w[15] = w[15] | 0x800000;
6864 break;
6865
6866 case 63:
6867 w[15] = w[15] | 0x80000000;
6868 break;
6869
6870 case 64:
6871 w[16] = 0x80;
6872 break;
6873
6874 case 65:
6875 w[16] = w[16] | 0x8000;
6876 break;
6877
6878 case 66:
6879 w[16] = w[16] | 0x800000;
6880 break;
6881
6882 case 67:
6883 w[16] = w[16] | 0x80000000;
6884 break;
6885
6886 case 68:
6887 w[17] = 0x80;
6888 break;
6889
6890 case 69:
6891 w[17] = w[17] | 0x8000;
6892 break;
6893
6894 case 70:
6895 w[17] = w[17] | 0x800000;
6896 break;
6897
6898 case 71:
6899 w[17] = w[17] | 0x80000000;
6900 break;
6901
6902 case 72:
6903 w[18] = 0x80;
6904 break;
6905
6906 case 73:
6907 w[18] = w[18] | 0x8000;
6908 break;
6909
6910 case 74:
6911 w[18] = w[18] | 0x800000;
6912 break;
6913
6914 case 75:
6915 w[18] = w[18] | 0x80000000;
6916 break;
6917
6918 case 76:
6919 w[19] = 0x80;
6920 break;
6921
6922 case 77:
6923 w[19] = w[19] | 0x8000;
6924 break;
6925
6926 case 78:
6927 w[19] = w[19] | 0x800000;
6928 break;
6929
6930 case 79:
6931 w[19] = w[19] | 0x80000000;
6932 break;
6933
6934 case 80:
6935 w[20] = 0x80;
6936 break;
6937
6938 case 81:
6939 w[20] = w[20] | 0x8000;
6940 break;
6941
6942 case 82:
6943 w[20] = w[20] | 0x800000;
6944 break;
6945
6946 case 83:
6947 w[20] = w[20] | 0x80000000;
6948 break;
6949
6950 case 84:
6951 w[21] = 0x80;
6952 break;
6953
6954 case 85:
6955 w[21] = w[21] | 0x8000;
6956 break;
6957
6958 case 86:
6959 w[21] = w[21] | 0x800000;
6960 break;
6961
6962 case 87:
6963 w[21] = w[21] | 0x80000000;
6964 break;
6965
6966 case 88:
6967 w[22] = 0x80;
6968 break;
6969
6970 case 89:
6971 w[22] = w[22] | 0x8000;
6972 break;
6973
6974 case 90:
6975 w[22] = w[22] | 0x800000;
6976 break;
6977
6978 case 91:
6979 w[22] = w[22] | 0x80000000;
6980 break;
6981
6982 case 92:
6983 w[23] = 0x80;
6984 break;
6985
6986 case 93:
6987 w[23] = w[23] | 0x8000;
6988 break;
6989
6990 case 94:
6991 w[23] = w[23] | 0x800000;
6992 break;
6993
6994 case 95:
6995 w[23] = w[23] | 0x80000000;
6996 break;
6997
6998 case 96:
6999 w[24] = 0x80;
7000 break;
7001
7002 case 97:
7003 w[24] = w[24] | 0x8000;
7004 break;
7005
7006 case 98:
7007 w[24] = w[24] | 0x800000;
7008 break;
7009
7010 case 99:
7011 w[24] = w[24] | 0x80000000;
7012 break;
7013
7014 case 100:
7015 w[25] = 0x80;
7016 break;
7017
7018 case 101:
7019 w[25] = w[25] | 0x8000;
7020 break;
7021
7022 case 102:
7023 w[25] = w[25] | 0x800000;
7024 break;
7025
7026 case 103:
7027 w[25] = w[25] | 0x80000000;
7028 break;
7029
7030 case 104:
7031 w[26] = 0x80;
7032 break;
7033
7034 case 105:
7035 w[26] = w[26] | 0x8000;
7036 break;
7037
7038 case 106:
7039 w[26] = w[26] | 0x800000;
7040 break;
7041
7042 case 107:
7043 w[26] = w[26] | 0x80000000;
7044 break;
7045
7046 case 108:
7047 w[27] = 0x80;
7048 break;
7049
7050 case 109:
7051 w[27] = w[27] | 0x8000;
7052 break;
7053
7054 case 110:
7055 w[27] = w[27] | 0x800000;
7056 break;
7057
7058 case 111:
7059 w[27] = w[27] | 0x80000000;
7060 break;
7061
7062 case 112:
7063 w[28] = 0x80;
7064 break;
7065
7066 case 113:
7067 w[28] = w[28] | 0x8000;
7068 break;
7069
7070 case 114:
7071 w[28] = w[28] | 0x800000;
7072 break;
7073
7074 case 115:
7075 w[28] = w[28] | 0x80000000;
7076 break;
7077
7078 case 116:
7079 w[29] = 0x80;
7080 break;
7081
7082 case 117:
7083 w[29] = w[29] | 0x8000;
7084 break;
7085
7086 case 118:
7087 w[29] = w[29] | 0x800000;
7088 break;
7089
7090 case 119:
7091 w[29] = w[29] | 0x80000000;
7092 break;
7093
7094 case 120:
7095 w[30] = 0x80;
7096 break;
7097
7098 case 121:
7099 w[30] = w[30] | 0x8000;
7100 break;
7101
7102 case 122:
7103 w[30] = w[30] | 0x800000;
7104 break;
7105
7106 case 123:
7107 w[30] = w[30] | 0x80000000;
7108 break;
7109
7110 case 124:
7111 w[31] = 0x80;
7112 break;
7113
7114 case 125:
7115 w[31] = w[31] | 0x8000;
7116 break;
7117
7118 case 126:
7119 w[31] = w[31] | 0x800000;
7120 break;
7121
7122 case 127:
7123 w[31] = w[31] | 0x80000000;
7124 break;
7125 }
7126 }
7127
7128 // before: device_memcat2L
7129 static void memcat_c7_d1x2_sl1x2_sr1x2 (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2])
7130 {
7131 switch (offset)
7132 {
7133 case 1:
7134 dst0[0] = src_l0[0] | src_r0[0] << 8;
7135 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
7136 break;
7137
7138 case 2:
7139 dst0[0] = src_l0[0] | src_r0[0] << 16;
7140 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
7141 break;
7142
7143 case 3:
7144 dst0[0] = src_l0[0] | src_r0[0] << 24;
7145 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
7146 break;
7147
7148 case 4:
7149 dst0[1] = src_r0[0];
7150 break;
7151
7152 case 5:
7153 dst0[1] = src_l0[1] | src_r0[0] << 8;
7154 break;
7155
7156 case 6:
7157 dst0[1] = src_l0[1] | src_r0[0] << 16;
7158 break;
7159
7160 case 7:
7161 dst0[1] = src_l0[1] | src_r0[0] << 24;
7162 break;
7163 }
7164 }
7165
7166 // before: device_memcat4L
7167 static void memcat_c15_d1x4_sl1x4_sr1x4 (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4])
7168 {
7169 switch (offset)
7170 {
7171 case 1:
7172 dst0[0] = src_l0[0] | src_r0[0] << 8;
7173 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
7174 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
7175 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
7176 break;
7177
7178 case 2:
7179 dst0[0] = src_l0[0] | src_r0[0] << 16;
7180 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
7181 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
7182 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
7183 break;
7184
7185 case 3:
7186 dst0[0] = src_l0[0] | src_r0[0] << 24;
7187 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
7188 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
7189 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
7190 break;
7191
7192 case 4:
7193 dst0[1] = src_r0[0];
7194 dst0[2] = src_r0[1];
7195 dst0[3] = src_r0[2];
7196 break;
7197
7198 case 5:
7199 dst0[1] = src_l0[1] | src_r0[0] << 8;
7200 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
7201 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
7202 break;
7203
7204 case 6:
7205 dst0[1] = src_l0[1] | src_r0[0] << 16;
7206 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
7207 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
7208 break;
7209
7210 case 7:
7211 dst0[1] = src_l0[1] | src_r0[0] << 24;
7212 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
7213 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
7214 break;
7215
7216 case 8:
7217 dst0[2] = src_r0[0];
7218 dst0[3] = src_r0[1];
7219 break;
7220
7221 case 9:
7222 dst0[2] = src_l0[2] | src_r0[0] << 8;
7223 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
7224 break;
7225
7226 case 10:
7227 dst0[2] = src_l0[2] | src_r0[0] << 16;
7228 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
7229 break;
7230
7231 case 11:
7232 dst0[2] = src_l0[2] | src_r0[0] << 24;
7233 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
7234 break;
7235
7236 case 12:
7237 dst0[3] = src_r0[0];
7238 break;
7239
7240 case 13:
7241 dst0[3] = src_l0[3] | src_r0[0] << 8;
7242 break;
7243
7244 case 14:
7245 dst0[3] = src_l0[3] | src_r0[0] << 16;
7246 break;
7247
7248 case 15:
7249 dst0[3] = src_l0[3] | src_r0[0] << 24;
7250 break;
7251 }
7252 }
7253
7254 // before: device_memcat8L
7255 static void memcat_c31_d2x4_sl2x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4])
7256 {
7257 switch (offset)
7258 {
7259 case 1:
7260 dst0[0] = src_l0[0] | src_r0[0] << 8;
7261 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
7262 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
7263 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
7264 dst1[0] = src_r0[3] >> 24;
7265 break;
7266
7267 case 2:
7268 dst0[0] = src_l0[0] | src_r0[0] << 16;
7269 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
7270 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
7271 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
7272 dst1[0] = src_r0[3] >> 16;
7273 break;
7274
7275 case 3:
7276 dst0[0] = src_l0[0] | src_r0[0] << 24;
7277 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
7278 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
7279 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
7280 dst1[0] = src_r0[3] >> 8;
7281 break;
7282
7283 case 4:
7284 dst0[1] = src_r0[0];
7285 dst0[2] = src_r0[1];
7286 dst0[3] = src_r0[2];
7287 dst1[0] = src_r0[3];
7288 break;
7289
7290 case 5:
7291 dst0[1] = src_l0[1] | src_r0[0] << 8;
7292 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
7293 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
7294 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
7295 dst1[1] = src_r0[3] >> 24;
7296 break;
7297
7298 case 6:
7299 dst0[1] = src_l0[1] | src_r0[0] << 16;
7300 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
7301 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
7302 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
7303 dst1[1] = src_r0[3] >> 16;
7304 break;
7305
7306 case 7:
7307 dst0[1] = src_l0[1] | src_r0[0] << 24;
7308 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
7309 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
7310 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
7311 dst1[1] = src_r0[3] >> 8;
7312 break;
7313
7314 case 8:
7315 dst0[2] = src_r0[0];
7316 dst0[3] = src_r0[1];
7317 dst1[0] = src_r0[2];
7318 dst1[1] = src_r0[3];
7319 break;
7320
7321 case 9:
7322 dst0[2] = src_l0[2] | src_r0[0] << 8;
7323 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
7324 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
7325 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
7326 dst1[2] = src_r0[3] >> 24;
7327 break;
7328
7329 case 10:
7330 dst0[2] = src_l0[2] | src_r0[0] << 16;
7331 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
7332 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
7333 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
7334 dst1[2] = src_r0[3] >> 16;
7335 break;
7336
7337 case 11:
7338 dst0[2] = src_l0[2] | src_r0[0] << 24;
7339 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
7340 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
7341 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
7342 dst1[2] = src_r0[3] >> 8;
7343 break;
7344
7345 case 12:
7346 dst0[3] = src_r0[0];
7347 dst1[0] = src_r0[1];
7348 dst1[1] = src_r0[2];
7349 dst1[2] = src_r0[3];
7350 break;
7351
7352 case 13:
7353 dst0[3] = src_l0[3] | src_r0[0] << 8;
7354 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
7355 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
7356 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
7357 dst1[3] = src_r0[3] >> 24;
7358 break;
7359
7360 case 14:
7361 dst0[3] = src_l0[3] | src_r0[0] << 16;
7362 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
7363 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
7364 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
7365 dst1[3] = src_r0[3] >> 16;
7366 break;
7367
7368 case 15:
7369 dst0[3] = src_l0[3] | src_r0[0] << 24;
7370 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
7371 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
7372 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
7373 dst1[3] = src_r0[3] >> 8;
7374 break;
7375
7376 case 16:
7377 dst1[0] = src_r0[0];
7378 dst1[1] = src_r0[1];
7379 dst1[2] = src_r0[2];
7380 dst1[3] = src_r0[3];
7381 break;
7382
7383 case 17:
7384 dst1[0] = src_l1[0] | src_r0[0] << 8;
7385 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
7386 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
7387 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
7388 break;
7389
7390 case 18:
7391 dst1[0] = src_l1[0] | src_r0[0] << 16;
7392 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
7393 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
7394 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
7395 break;
7396
7397 case 19:
7398 dst1[0] = src_l1[0] | src_r0[0] << 24;
7399 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
7400 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
7401 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
7402 break;
7403
7404 case 20:
7405 dst1[1] = src_r0[0];
7406 dst1[2] = src_r0[1];
7407 dst1[3] = src_r0[2];
7408 break;
7409
7410 case 21:
7411 dst1[1] = src_l1[1] | src_r0[0] << 8;
7412 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
7413 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
7414 break;
7415
7416 case 22:
7417 dst1[1] = src_l1[1] | src_r0[0] << 16;
7418 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
7419 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
7420 break;
7421
7422 case 23:
7423 dst1[1] = src_l1[1] | src_r0[0] << 24;
7424 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
7425 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
7426 break;
7427
7428 case 24:
7429 dst1[2] = src_r0[0];
7430 dst1[3] = src_r0[1];
7431 break;
7432
7433 case 25:
7434 dst1[2] = src_l1[2] | src_r0[0] << 8;
7435 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
7436 break;
7437
7438 case 26:
7439 dst1[2] = src_l1[2] | src_r0[0] << 16;
7440 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
7441 break;
7442
7443 case 27:
7444 dst1[2] = src_l1[2] | src_r0[0] << 24;
7445 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
7446 break;
7447
7448 case 28:
7449 dst1[3] = src_r0[0];
7450 break;
7451
7452 case 29:
7453 dst1[3] = src_l1[3] | src_r0[0] << 8;
7454 break;
7455
7456 case 30:
7457 dst1[3] = src_l1[3] | src_r0[0] << 16;
7458 break;
7459
7460 case 31:
7461 dst1[3] = src_l1[3] | src_r0[0] << 24;
7462 break;
7463 }
7464 }
7465
7466 // before: device_memcat12L
7467 static void memcat_c47_d3x4_sl3x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4])
7468 {
7469 switch (offset)
7470 {
7471 case 1:
7472 dst0[0] = src_l0[0] | src_r0[0] << 8;
7473 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
7474 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
7475 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
7476 dst1[0] = src_r0[3] >> 24;
7477 break;
7478
7479 case 2:
7480 dst0[0] = src_l0[0] | src_r0[0] << 16;
7481 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
7482 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
7483 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
7484 dst1[0] = src_r0[3] >> 16;
7485 break;
7486
7487 case 3:
7488 dst0[0] = src_l0[0] | src_r0[0] << 24;
7489 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
7490 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
7491 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
7492 dst1[0] = src_r0[3] >> 8;
7493 break;
7494
7495 case 4:
7496 dst0[1] = src_r0[0];
7497 dst0[2] = src_r0[1];
7498 dst0[3] = src_r0[2];
7499 dst1[0] = src_r0[3];
7500 break;
7501
7502 case 5:
7503 dst0[1] = src_l0[1] | src_r0[0] << 8;
7504 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
7505 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
7506 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
7507 dst1[1] = src_r0[3] >> 24;
7508 break;
7509
7510 case 6:
7511 dst0[1] = src_l0[1] | src_r0[0] << 16;
7512 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
7513 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
7514 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
7515 dst1[1] = src_r0[3] >> 16;
7516 break;
7517
7518 case 7:
7519 dst0[1] = src_l0[1] | src_r0[0] << 24;
7520 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
7521 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
7522 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
7523 dst1[1] = src_r0[3] >> 8;
7524 break;
7525
7526 case 8:
7527 dst0[2] = src_r0[0];
7528 dst0[3] = src_r0[1];
7529 dst1[0] = src_r0[2];
7530 dst1[1] = src_r0[3];
7531 break;
7532
7533 case 9:
7534 dst0[2] = src_l0[2] | src_r0[0] << 8;
7535 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
7536 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
7537 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
7538 dst1[2] = src_r0[3] >> 24;
7539 break;
7540
7541 case 10:
7542 dst0[2] = src_l0[2] | src_r0[0] << 16;
7543 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
7544 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
7545 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
7546 dst1[2] = src_r0[3] >> 16;
7547 break;
7548
7549 case 11:
7550 dst0[2] = src_l0[2] | src_r0[0] << 24;
7551 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
7552 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
7553 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
7554 dst1[2] = src_r0[3] >> 8;
7555 break;
7556
7557 case 12:
7558 dst0[3] = src_r0[0];
7559 dst1[0] = src_r0[1];
7560 dst1[1] = src_r0[2];
7561 dst1[2] = src_r0[3];
7562 break;
7563
7564 case 13:
7565 dst0[3] = src_l0[3] | src_r0[0] << 8;
7566 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
7567 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
7568 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
7569 dst1[3] = src_r0[3] >> 24;
7570 break;
7571
7572 case 14:
7573 dst0[3] = src_l0[3] | src_r0[0] << 16;
7574 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
7575 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
7576 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
7577 dst1[3] = src_r0[3] >> 16;
7578 break;
7579
7580 case 15:
7581 dst0[3] = src_l0[3] | src_r0[0] << 24;
7582 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
7583 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
7584 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
7585 dst1[3] = src_r0[3] >> 8;
7586 break;
7587
7588 case 16:
7589 dst1[0] = src_r0[0];
7590 dst1[1] = src_r0[1];
7591 dst1[2] = src_r0[2];
7592 dst1[3] = src_r0[3];
7593 break;
7594
7595 case 17:
7596 dst1[0] = src_l1[0] | src_r0[0] << 8;
7597 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
7598 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
7599 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
7600 dst2[0] = src_r0[3] >> 24;
7601 break;
7602
7603 case 18:
7604 dst1[0] = src_l1[0] | src_r0[0] << 16;
7605 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
7606 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
7607 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
7608 dst2[0] = src_r0[3] >> 16;
7609 break;
7610
7611 case 19:
7612 dst1[0] = src_l1[0] | src_r0[0] << 24;
7613 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
7614 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
7615 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
7616 dst2[0] = src_r0[3] >> 8;
7617 break;
7618
7619 case 20:
7620 dst1[1] = src_r0[0];
7621 dst1[2] = src_r0[1];
7622 dst1[3] = src_r0[2];
7623 dst2[0] = src_r0[3];
7624 break;
7625
7626 case 21:
7627 dst1[1] = src_l1[1] | src_r0[0] << 8;
7628 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
7629 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
7630 dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
7631 dst2[1] = src_r0[3] >> 24;
7632 break;
7633
7634 case 22:
7635 dst1[1] = src_l1[1] | src_r0[0] << 16;
7636 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
7637 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
7638 dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
7639 dst2[1] = src_r0[3] >> 16;
7640 break;
7641
7642 case 23:
7643 dst1[1] = src_l1[1] | src_r0[0] << 24;
7644 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
7645 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
7646 dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
7647 dst2[1] = src_r0[3] >> 8;
7648 break;
7649
7650 case 24:
7651 dst1[2] = src_r0[0];
7652 dst1[3] = src_r0[1];
7653 dst2[0] = src_r0[2];
7654 dst2[1] = src_r0[3];
7655 break;
7656
7657 case 25:
7658 dst1[2] = src_l1[2] | src_r0[0] << 8;
7659 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
7660 dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
7661 dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
7662 dst2[2] = src_r0[3] >> 24;
7663 break;
7664
7665 case 26:
7666 dst1[2] = src_l1[2] | src_r0[0] << 16;
7667 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
7668 dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
7669 dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
7670 dst2[2] = src_r0[3] >> 16;
7671 break;
7672
7673 case 27:
7674 dst1[2] = src_l1[2] | src_r0[0] << 24;
7675 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
7676 dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
7677 dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
7678 dst2[2] = src_r0[3] >> 8;
7679 break;
7680
7681 case 28:
7682 dst1[3] = src_r0[0];
7683 dst2[0] = src_r0[1];
7684 dst2[1] = src_r0[2];
7685 dst2[2] = src_r0[3];
7686 break;
7687
7688 case 29:
7689 dst1[3] = src_l1[3] | src_r0[0] << 8;
7690 dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
7691 dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
7692 dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
7693 dst2[3] = src_r0[3] >> 24;
7694 break;
7695
7696 case 30:
7697 dst1[3] = src_l1[3] | src_r0[0] << 16;
7698 dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
7699 dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
7700 dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
7701 dst2[3] = src_r0[3] >> 16;
7702 break;
7703
7704 case 31:
7705 dst1[3] = src_l1[3] | src_r0[0] << 24;
7706 dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
7707 dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
7708 dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
7709 dst2[3] = src_r0[3] >> 8;
7710 break;
7711
7712 case 32:
7713 dst2[0] = src_r0[0];
7714 dst2[1] = src_r0[1];
7715 dst2[2] = src_r0[2];
7716 dst2[3] = src_r0[3];
7717 break;
7718
7719 case 33:
7720 dst2[0] = src_l2[0] | src_r0[0] << 8;
7721 dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
7722 dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
7723 dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
7724 break;
7725
7726 case 34:
7727 dst2[0] = src_l2[0] | src_r0[0] << 16;
7728 dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
7729 dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
7730 dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
7731 break;
7732
7733 case 35:
7734 dst2[0] = src_l2[0] | src_r0[0] << 24;
7735 dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
7736 dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
7737 dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
7738 break;
7739
7740 case 36:
7741 dst2[1] = src_r0[0];
7742 dst2[2] = src_r0[1];
7743 dst2[3] = src_r0[2];
7744 break;
7745
7746 case 37:
7747 dst2[1] = src_l2[1] | src_r0[0] << 8;
7748 dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
7749 dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
7750 break;
7751
7752 case 38:
7753 dst2[1] = src_l2[1] | src_r0[0] << 16;
7754 dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
7755 dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
7756 break;
7757
7758 case 39:
7759 dst2[1] = src_l2[1] | src_r0[0] << 24;
7760 dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
7761 dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
7762 break;
7763
7764 case 40:
7765 dst2[2] = src_r0[0];
7766 dst2[3] = src_r0[1];
7767 break;
7768
7769 case 41:
7770 dst2[2] = src_l2[2] | src_r0[0] << 8;
7771 dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
7772 break;
7773
7774 case 42:
7775 dst2[2] = src_l2[2] | src_r0[0] << 16;
7776 dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
7777 break;
7778
7779 case 43:
7780 dst2[2] = src_l2[2] | src_r0[0] << 24;
7781 dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
7782 break;
7783
7784 case 44:
7785 dst2[3] = src_r0[0];
7786 break;
7787
7788 case 45:
7789 dst2[3] = src_l2[3] | src_r0[0] << 8;
7790 break;
7791
7792 case 46:
7793 dst2[3] = src_l2[3] | src_r0[0] << 16;
7794 break;
7795
7796 case 47:
7797 dst2[3] = src_l2[3] | src_r0[0] << 24;
7798 break;
7799 }
7800 }
7801
7802 // before: device_memcat12L
7803 static void memcat_c47_d3x4_sl3x4_sr2x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4])
7804 {
7805 switch (offset)
7806 {
7807 case 0:
7808 dst0[0] = src_r0[0];
7809 dst0[1] = src_r0[1];
7810 dst0[2] = src_r0[2];
7811 dst0[3] = src_r0[3];
7812 dst1[0] = src_r1[0];
7813 dst1[1] = src_r1[1];
7814 dst1[2] = src_r1[2];
7815 dst1[3] = src_r1[3];
7816 break;
7817
7818 case 1:
7819 dst0[0] = src_l0[0] | src_r0[0] << 8;
7820 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
7821 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
7822 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
7823 dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8;
7824 dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8;
7825 dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8;
7826 dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8;
7827 dst2[0] = src_r1[3] >> 24;
7828 break;
7829
7830 case 2:
7831 dst0[0] = src_l0[0] | src_r0[0] << 16;
7832 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
7833 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
7834 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
7835 dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16;
7836 dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16;
7837 dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16;
7838 dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16;
7839 dst2[0] = src_r1[3] >> 16;
7840 break;
7841
7842 case 3:
7843 dst0[0] = src_l0[0] | src_r0[0] << 24;
7844 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
7845 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
7846 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
7847 dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24;
7848 dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24;
7849 dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24;
7850 dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24;
7851 dst2[0] = src_r1[3] >> 8;
7852 break;
7853
7854 case 4:
7855 dst0[1] = src_r0[0];
7856 dst0[2] = src_r0[1];
7857 dst0[3] = src_r0[2];
7858 dst1[0] = src_r0[3];
7859 dst1[1] = src_r1[0];
7860 dst1[2] = src_r1[1];
7861 dst1[3] = src_r1[2];
7862 dst2[0] = src_r1[3];
7863 break;
7864
7865 case 5:
7866 dst0[1] = src_l0[1] | src_r0[0] << 8;
7867 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
7868 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
7869 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
7870 dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8;
7871 dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8;
7872 dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8;
7873 dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8;
7874 dst2[1] = src_r1[3] >> 24;
7875 break;
7876
7877 case 6:
7878 dst0[1] = src_l0[1] | src_r0[0] << 16;
7879 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
7880 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
7881 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
7882 dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16;
7883 dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16;
7884 dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16;
7885 dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16;
7886 dst2[1] = src_r1[3] >> 16;
7887 break;
7888
7889 case 7:
7890 dst0[1] = src_l0[1] | src_r0[0] << 24;
7891 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
7892 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
7893 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
7894 dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24;
7895 dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24;
7896 dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24;
7897 dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24;
7898 dst2[1] = src_r1[3] >> 8;
7899 break;
7900
7901 case 8:
7902 dst0[2] = src_r0[0];
7903 dst0[3] = src_r0[1];
7904 dst1[0] = src_r0[2];
7905 dst1[1] = src_r0[3];
7906 dst1[2] = src_r1[0];
7907 dst1[3] = src_r1[1];
7908 dst2[0] = src_r1[2];
7909 dst2[1] = src_r1[3];
7910 break;
7911
7912 case 9:
7913 dst0[2] = src_l0[2] | src_r0[0] << 8;
7914 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
7915 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
7916 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
7917 dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8;
7918 dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8;
7919 dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8;
7920 dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8;
7921 dst2[2] = src_r1[3] >> 24;
7922 break;
7923
7924 case 10:
7925 dst0[2] = src_l0[2] | src_r0[0] << 16;
7926 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
7927 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
7928 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
7929 dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16;
7930 dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16;
7931 dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16;
7932 dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16;
7933 dst2[2] = src_r1[3] >> 16;
7934 break;
7935
7936 case 11:
7937 dst0[2] = src_l0[2] | src_r0[0] << 24;
7938 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
7939 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
7940 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
7941 dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24;
7942 dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24;
7943 dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24;
7944 dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24;
7945 dst2[2] = src_r1[3] >> 8;
7946 break;
7947
7948 case 12:
7949 dst0[3] = src_r0[0];
7950 dst1[0] = src_r0[1];
7951 dst1[1] = src_r0[2];
7952 dst1[2] = src_r0[3];
7953 dst1[3] = src_r1[0];
7954 dst2[0] = src_r1[1];
7955 dst2[1] = src_r1[2];
7956 dst2[2] = src_r1[3];
7957 break;
7958
7959 case 13:
7960 dst0[3] = src_l0[3] | src_r0[0] << 8;
7961 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
7962 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
7963 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
7964 dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8;
7965 dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8;
7966 dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8;
7967 dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8;
7968 dst2[3] = src_r1[3] >> 24;
7969 break;
7970
7971 case 14:
7972 dst0[3] = src_l0[3] | src_r0[0] << 16;
7973 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
7974 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
7975 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
7976 dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16;
7977 dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16;
7978 dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16;
7979 dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16;
7980 dst2[3] = src_r1[3] >> 16;
7981 break;
7982
7983 case 15:
7984 dst0[3] = src_l0[3] | src_r0[0] << 24;
7985 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
7986 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
7987 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
7988 dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24;
7989 dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24;
7990 dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24;
7991 dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24;
7992 dst2[3] = src_r1[3] >> 8;
7993 break;
7994
7995 case 16:
7996 dst1[0] = src_r0[0];
7997 dst1[1] = src_r0[1];
7998 dst1[2] = src_r0[2];
7999 dst1[3] = src_r0[3];
8000 dst2[0] = src_r1[0];
8001 dst2[1] = src_r1[1];
8002 dst2[2] = src_r1[2];
8003 dst2[3] = src_r1[3];
8004 break;
8005
8006 case 17:
8007 dst1[0] = src_l1[0] | src_r0[0] << 8;
8008 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
8009 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
8010 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
8011 dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8;
8012 dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8;
8013 dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8;
8014 dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8;
8015 break;
8016
8017 case 18:
8018 dst1[0] = src_l1[0] | src_r0[0] << 16;
8019 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
8020 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
8021 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
8022 dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16;
8023 dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16;
8024 dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16;
8025 dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16;
8026 break;
8027
8028 case 19:
8029 dst1[0] = src_l1[0] | src_r0[0] << 24;
8030 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
8031 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
8032 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
8033 dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24;
8034 dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24;
8035 dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24;
8036 dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24;
8037 break;
8038
8039 case 20:
8040 dst1[1] = src_r1[0];
8041 dst1[2] = src_r0[1];
8042 dst1[3] = src_r0[2];
8043 dst2[0] = src_r0[3];
8044 dst2[1] = src_r1[0];
8045 dst2[2] = src_r1[1];
8046 dst2[3] = src_r1[2];
8047 break;
8048
8049 case 21:
8050 dst1[1] = src_l1[1] | src_r0[0] << 8;
8051 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
8052 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
8053 dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
8054 dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8;
8055 dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8;
8056 dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8;
8057 break;
8058
8059 case 22:
8060 dst1[1] = src_l1[1] | src_r0[0] << 16;
8061 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
8062 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
8063 dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
8064 dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16;
8065 dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16;
8066 dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16;
8067 break;
8068
8069 case 23:
8070 dst1[1] = src_l1[1] | src_r0[0] << 24;
8071 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
8072 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
8073 dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
8074 dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24;
8075 dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24;
8076 dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24;
8077 break;
8078
8079 case 24:
8080 dst1[2] = src_r1[0];
8081 dst1[3] = src_r0[1];
8082 dst2[0] = src_r0[2];
8083 dst2[1] = src_r0[3];
8084 dst2[2] = src_r1[0];
8085 dst2[3] = src_r1[1];
8086 break;
8087
8088 case 25:
8089 dst1[2] = src_l1[2] | src_r0[0] << 8;
8090 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
8091 dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
8092 dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
8093 dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8;
8094 dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8;
8095 break;
8096
8097 case 26:
8098 dst1[2] = src_l1[2] | src_r0[0] << 16;
8099 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
8100 dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
8101 dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
8102 dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16;
8103 dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16;
8104 break;
8105
8106 case 27:
8107 dst1[2] = src_l1[2] | src_r0[0] << 24;
8108 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
8109 dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
8110 dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
8111 dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24;
8112 dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24;
8113 break;
8114
8115 case 28:
8116 dst1[3] = src_r1[0];
8117 dst2[0] = src_r0[1];
8118 dst2[1] = src_r0[2];
8119 dst2[2] = src_r0[3];
8120 dst2[3] = src_r1[0];
8121 break;
8122
8123 case 29:
8124 dst1[3] = src_l1[3] | src_r0[0] << 8;
8125 dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
8126 dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
8127 dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
8128 dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8;
8129 break;
8130
8131 case 30:
8132 dst1[3] = src_l1[3] | src_r0[0] << 16;
8133 dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
8134 dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
8135 dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
8136 dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16;
8137 break;
8138
8139 case 31:
8140 dst1[3] = src_l1[3] | src_r0[0] << 24;
8141 dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
8142 dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
8143 dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
8144 dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24;
8145 break;
8146
8147 case 32:
8148 dst2[0] = src_r0[0];
8149 dst2[1] = src_r0[1];
8150 dst2[2] = src_r0[2];
8151 dst2[3] = src_r0[3];
8152 break;
8153
8154 case 33:
8155 dst2[0] = src_l2[0] | src_r0[0] << 8;
8156 dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
8157 dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
8158 dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
8159 break;
8160
8161 case 34:
8162 dst2[0] = src_l2[0] | src_r0[0] << 16;
8163 dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
8164 dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
8165 dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
8166 break;
8167
8168 case 35:
8169 dst2[0] = src_l2[0] | src_r0[0] << 24;
8170 dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
8171 dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
8172 dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
8173 break;
8174
8175 case 36:
8176 dst2[1] = src_r0[0];
8177 dst2[2] = src_r0[1];
8178 dst2[3] = src_r0[2];
8179 break;
8180
8181 case 37:
8182 dst2[1] = src_l2[1] | src_r0[0] << 8;
8183 dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
8184 dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
8185 break;
8186
8187 case 38:
8188 dst2[1] = src_l2[1] | src_r0[0] << 16;
8189 dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
8190 dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
8191 break;
8192
8193 case 39:
8194 dst2[1] = src_l2[1] | src_r0[0] << 24;
8195 dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
8196 dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
8197 break;
8198
8199 case 40:
8200 dst2[2] = src_r0[0];
8201 dst2[3] = src_r0[1];
8202 break;
8203
8204 case 41:
8205 dst2[2] = src_l2[2] | src_r0[0] << 8;
8206 dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
8207 break;
8208
8209 case 42:
8210 dst2[2] = src_l2[2] | src_r0[0] << 16;
8211 dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
8212 break;
8213
8214 case 43:
8215 dst2[2] = src_l2[2] | src_r0[0] << 24;
8216 dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
8217 break;
8218
8219 case 44:
8220 dst2[3] = src_r0[0];
8221 break;
8222
8223 case 45:
8224 dst2[3] = src_l2[3] | src_r0[0] << 8;
8225 break;
8226
8227 case 46:
8228 dst2[3] = src_l2[3] | src_r0[0] << 16;
8229 break;
8230
8231 case 47:
8232 dst2[3] = src_l2[3] | src_r0[0] << 24;
8233 break;
8234 }
8235 }
8236
8237 // before: memcat16_9
8238 static void memcat_c15_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
8239 {
8240 switch (offset)
8241 {
8242 case 0:
8243 w0[0] = append0[0];
8244 w0[1] = append0[1];
8245 w0[2] = append0[2];
8246 w0[3] = append0[3];
8247 w1[0] = append1[0];
8248 w1[1] = append1[1];
8249 w1[2] = append1[2];
8250 w1[3] = append1[3];
8251 w2[0] = append2[0];
8252 break;
8253
8254 case 1:
8255 w0[0] = w0[0] | append0[0] << 8;
8256 w0[1] = append0[0] >> 24 | append0[1] << 8;
8257 w0[2] = append0[1] >> 24 | append0[2] << 8;
8258 w0[3] = append0[2] >> 24 | append0[3] << 8;
8259 w1[0] = append0[3] >> 24 | append1[0] << 8;
8260 w1[1] = append1[0] >> 24 | append1[1] << 8;
8261 w1[2] = append1[1] >> 24 | append1[2] << 8;
8262 w1[3] = append1[2] >> 24 | append1[3] << 8;
8263 w2[0] = append1[3] >> 24 | append2[0] << 8;
8264 w2[1] = append2[0] >> 24;
8265 break;
8266
8267 case 2:
8268 w0[0] = w0[0] | append0[0] << 16;
8269 w0[1] = append0[0] >> 16 | append0[1] << 16;
8270 w0[2] = append0[1] >> 16 | append0[2] << 16;
8271 w0[3] = append0[2] >> 16 | append0[3] << 16;
8272 w1[0] = append0[3] >> 16 | append1[0] << 16;
8273 w1[1] = append1[0] >> 16 | append1[1] << 16;
8274 w1[2] = append1[1] >> 16 | append1[2] << 16;
8275 w1[3] = append1[2] >> 16 | append1[3] << 16;
8276 w2[0] = append1[3] >> 16 | append2[0] << 16;
8277 w2[1] = append2[0] >> 16;
8278 break;
8279
8280 case 3:
8281 w0[0] = w0[0] | append0[0] << 24;
8282 w0[1] = append0[0] >> 8 | append0[1] << 24;
8283 w0[2] = append0[1] >> 8 | append0[2] << 24;
8284 w0[3] = append0[2] >> 8 | append0[3] << 24;
8285 w1[0] = append0[3] >> 8 | append1[0] << 24;
8286 w1[1] = append1[0] >> 8 | append1[1] << 24;
8287 w1[2] = append1[1] >> 8 | append1[2] << 24;
8288 w1[3] = append1[2] >> 8 | append1[3] << 24;
8289 w2[0] = append1[3] >> 8 | append2[0] << 24;
8290 w2[1] = append2[0] >> 8;
8291 break;
8292
8293 case 4:
8294 w0[1] = append0[0];
8295 w0[2] = append0[1];
8296 w0[3] = append0[2];
8297 w1[0] = append0[3];
8298 w1[1] = append1[0];
8299 w1[2] = append1[1];
8300 w1[3] = append1[2];
8301 w2[0] = append1[3];
8302 w2[1] = append2[0];
8303 break;
8304
8305 case 5:
8306 w0[1] = w0[1] | append0[0] << 8;
8307 w0[2] = append0[0] >> 24 | append0[1] << 8;
8308 w0[3] = append0[1] >> 24 | append0[2] << 8;
8309 w1[0] = append0[2] >> 24 | append0[3] << 8;
8310 w1[1] = append0[3] >> 24 | append1[0] << 8;
8311 w1[2] = append1[0] >> 24 | append1[1] << 8;
8312 w1[3] = append1[1] >> 24 | append1[2] << 8;
8313 w2[0] = append1[2] >> 24 | append1[3] << 8;
8314 w2[1] = append1[3] >> 24 | append2[0] << 8;
8315 w2[2] = append2[0] >> 24;
8316 break;
8317
8318 case 6:
8319 w0[1] = w0[1] | append0[0] << 16;
8320 w0[2] = append0[0] >> 16 | append0[1] << 16;
8321 w0[3] = append0[1] >> 16 | append0[2] << 16;
8322 w1[0] = append0[2] >> 16 | append0[3] << 16;
8323 w1[1] = append0[3] >> 16 | append1[0] << 16;
8324 w1[2] = append1[0] >> 16 | append1[1] << 16;
8325 w1[3] = append1[1] >> 16 | append1[2] << 16;
8326 w2[0] = append1[2] >> 16 | append1[3] << 16;
8327 w2[1] = append1[3] >> 16 | append2[0] << 16;
8328 w2[2] = append2[0] >> 16;
8329 break;
8330
8331 case 7:
8332 w0[1] = w0[1] | append0[0] << 24;
8333 w0[2] = append0[0] >> 8 | append0[1] << 24;
8334 w0[3] = append0[1] >> 8 | append0[2] << 24;
8335 w1[0] = append0[2] >> 8 | append0[3] << 24;
8336 w1[1] = append0[3] >> 8 | append1[0] << 24;
8337 w1[2] = append1[0] >> 8 | append1[1] << 24;
8338 w1[3] = append1[1] >> 8 | append1[2] << 24;
8339 w2[0] = append1[2] >> 8 | append1[3] << 24;
8340 w2[1] = append1[3] >> 8 | append2[0] << 24;
8341 w2[2] = append2[0] >> 8;
8342 break;
8343
8344 case 8:
8345 w0[2] = append0[0];
8346 w0[3] = append0[1];
8347 w1[0] = append0[2];
8348 w1[1] = append0[3];
8349 w1[2] = append1[0];
8350 w1[3] = append1[1];
8351 w2[0] = append1[2];
8352 w2[1] = append1[3];
8353 w2[2] = append2[0];
8354 break;
8355
8356 case 9:
8357 w0[2] = w0[2] | append0[0] << 8;
8358 w0[3] = append0[0] >> 24 | append0[1] << 8;
8359 w1[0] = append0[1] >> 24 | append0[2] << 8;
8360 w1[1] = append0[2] >> 24 | append0[3] << 8;
8361 w1[2] = append0[3] >> 24 | append1[0] << 8;
8362 w1[3] = append1[0] >> 24 | append1[1] << 8;
8363 w2[0] = append1[1] >> 24 | append1[2] << 8;
8364 w2[1] = append1[2] >> 24 | append1[3] << 8;
8365 w2[2] = append1[3] >> 24 | append2[0] << 8;
8366 w2[3] = append2[0] >> 24;
8367 break;
8368
8369 case 10:
8370 w0[2] = w0[2] | append0[0] << 16;
8371 w0[3] = append0[0] >> 16 | append0[1] << 16;
8372 w1[0] = append0[1] >> 16 | append0[2] << 16;
8373 w1[1] = append0[2] >> 16 | append0[3] << 16;
8374 w1[2] = append0[3] >> 16 | append1[0] << 16;
8375 w1[3] = append1[0] >> 16 | append1[1] << 16;
8376 w2[0] = append1[1] >> 16 | append1[2] << 16;
8377 w2[1] = append1[2] >> 16 | append1[3] << 16;
8378 w2[2] = append1[3] >> 16 | append2[0] << 16;
8379 w2[3] = append2[0] >> 16;
8380 break;
8381
8382 case 11:
8383 w0[2] = w0[2] | append0[0] << 24;
8384 w0[3] = append0[0] >> 8 | append0[1] << 24;
8385 w1[0] = append0[1] >> 8 | append0[2] << 24;
8386 w1[1] = append0[2] >> 8 | append0[3] << 24;
8387 w1[2] = append0[3] >> 8 | append1[0] << 24;
8388 w1[3] = append1[0] >> 8 | append1[1] << 24;
8389 w2[0] = append1[1] >> 8 | append1[2] << 24;
8390 w2[1] = append1[2] >> 8 | append1[3] << 24;
8391 w2[2] = append1[3] >> 8 | append2[0] << 24;
8392 w2[3] = append2[0] >> 8;
8393 break;
8394
8395 case 12:
8396 w0[3] = append0[0];
8397 w1[0] = append0[1];
8398 w1[1] = append0[2];
8399 w1[2] = append0[3];
8400 w1[3] = append1[0];
8401 w2[0] = append1[1];
8402 w2[1] = append1[2];
8403 w2[2] = append1[3];
8404 w2[3] = append2[0];
8405 break;
8406
8407 case 13:
8408 w0[3] = w0[3] | append0[0] << 8;
8409 w1[0] = append0[0] >> 24 | append0[1] << 8;
8410 w1[1] = append0[1] >> 24 | append0[2] << 8;
8411 w1[2] = append0[2] >> 24 | append0[3] << 8;
8412 w1[3] = append0[3] >> 24 | append1[0] << 8;
8413 w2[0] = append1[0] >> 24 | append1[1] << 8;
8414 w2[1] = append1[1] >> 24 | append1[2] << 8;
8415 w2[2] = append1[2] >> 24 | append1[3] << 8;
8416 w2[3] = append1[3] >> 24 | append2[0] << 8;
8417 w3[0] = append2[0] >> 24;
8418 break;
8419
8420 case 14:
8421 w0[3] = w0[3] | append0[0] << 16;
8422 w1[0] = append0[0] >> 16 | append0[1] << 16;
8423 w1[1] = append0[1] >> 16 | append0[2] << 16;
8424 w1[2] = append0[2] >> 16 | append0[3] << 16;
8425 w1[3] = append0[3] >> 16 | append1[0] << 16;
8426 w2[0] = append1[0] >> 16 | append1[1] << 16;
8427 w2[1] = append1[1] >> 16 | append1[2] << 16;
8428 w2[2] = append1[2] >> 16 | append1[3] << 16;
8429 w2[3] = append1[3] >> 16 | append2[0] << 16;
8430 w3[0] = append2[0] >> 16;
8431 break;
8432
8433 case 15:
8434 w0[3] = w0[3] | append0[0] << 24;
8435 w1[0] = append0[0] >> 8 | append0[1] << 24;
8436 w1[1] = append0[1] >> 8 | append0[2] << 24;
8437 w1[2] = append0[2] >> 8 | append0[3] << 24;
8438 w1[3] = append0[3] >> 8 | append1[0] << 24;
8439 w2[0] = append1[0] >> 8 | append1[1] << 24;
8440 w2[1] = append1[1] >> 8 | append1[2] << 24;
8441 w2[2] = append1[2] >> 8 | append1[3] << 24;
8442 w2[3] = append1[3] >> 8 | append2[0] << 24;
8443 w3[0] = append2[0] >> 8;
8444 break;
8445 }
8446 }
8447
8448 // before: memcat32_8
8449 static void memcat_c32_w4x4_a2x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset)
8450 {
8451 switch (offset)
8452 {
8453 case 0:
8454 w0[0] = append0[0];
8455 w0[1] = append0[1];
8456 w0[2] = append0[2];
8457 w0[3] = append0[3];
8458 w1[0] = append1[0];
8459 w1[1] = append1[1];
8460 w1[2] = append1[2];
8461 w1[3] = append1[3];
8462 break;
8463
8464 case 1:
8465 w0[0] = w0[0] | append0[0] << 8;
8466 w0[1] = append0[0] >> 24 | append0[1] << 8;
8467 w0[2] = append0[1] >> 24 | append0[2] << 8;
8468 w0[3] = append0[2] >> 24 | append0[3] << 8;
8469 w1[0] = append0[3] >> 24 | append1[0] << 8;
8470 w1[1] = append1[0] >> 24 | append1[1] << 8;
8471 w1[2] = append1[1] >> 24 | append1[2] << 8;
8472 w1[3] = append1[2] >> 24 | append1[3] << 8;
8473 w2[0] = append1[3] >> 24;
8474 break;
8475
8476 case 2:
8477 w0[0] = w0[0] | append0[0] << 16;
8478 w0[1] = append0[0] >> 16 | append0[1] << 16;
8479 w0[2] = append0[1] >> 16 | append0[2] << 16;
8480 w0[3] = append0[2] >> 16 | append0[3] << 16;
8481 w1[0] = append0[3] >> 16 | append1[0] << 16;
8482 w1[1] = append1[0] >> 16 | append1[1] << 16;
8483 w1[2] = append1[1] >> 16 | append1[2] << 16;
8484 w1[3] = append1[2] >> 16 | append1[3] << 16;
8485 w2[0] = append1[3] >> 16;
8486 break;
8487
8488 case 3:
8489 w0[0] = w0[0] | append0[0] << 24;
8490 w0[1] = append0[0] >> 8 | append0[1] << 24;
8491 w0[2] = append0[1] >> 8 | append0[2] << 24;
8492 w0[3] = append0[2] >> 8 | append0[3] << 24;
8493 w1[0] = append0[3] >> 8 | append1[0] << 24;
8494 w1[1] = append1[0] >> 8 | append1[1] << 24;
8495 w1[2] = append1[1] >> 8 | append1[2] << 24;
8496 w1[3] = append1[2] >> 8 | append1[3] << 24;
8497 w2[0] = append1[3] >> 8;
8498 break;
8499
8500 case 4:
8501 w0[1] = append0[0];
8502 w0[2] = append0[1];
8503 w0[3] = append0[2];
8504 w1[0] = append0[3];
8505 w1[1] = append1[0];
8506 w1[2] = append1[1];
8507 w1[3] = append1[2];
8508 w2[0] = append1[3];
8509 break;
8510
8511 case 5:
8512 w0[1] = w0[1] | append0[0] << 8;
8513 w0[2] = append0[0] >> 24 | append0[1] << 8;
8514 w0[3] = append0[1] >> 24 | append0[2] << 8;
8515 w1[0] = append0[2] >> 24 | append0[3] << 8;
8516 w1[1] = append0[3] >> 24 | append1[0] << 8;
8517 w1[2] = append1[0] >> 24 | append1[1] << 8;
8518 w1[3] = append1[1] >> 24 | append1[2] << 8;
8519 w2[0] = append1[2] >> 24 | append1[3] << 8;
8520 w2[1] = append1[3] >> 24;
8521 break;
8522
8523 case 6:
8524 w0[1] = w0[1] | append0[0] << 16;
8525 w0[2] = append0[0] >> 16 | append0[1] << 16;
8526 w0[3] = append0[1] >> 16 | append0[2] << 16;
8527 w1[0] = append0[2] >> 16 | append0[3] << 16;
8528 w1[1] = append0[3] >> 16 | append1[0] << 16;
8529 w1[2] = append1[0] >> 16 | append1[1] << 16;
8530 w1[3] = append1[1] >> 16 | append1[2] << 16;
8531 w2[0] = append1[2] >> 16 | append1[3] << 16;
8532 w2[1] = append1[3] >> 16;
8533 break;
8534
8535 case 7:
8536 w0[1] = w0[1] | append0[0] << 24;
8537 w0[2] = append0[0] >> 8 | append0[1] << 24;
8538 w0[3] = append0[1] >> 8 | append0[2] << 24;
8539 w1[0] = append0[2] >> 8 | append0[3] << 24;
8540 w1[1] = append0[3] >> 8 | append1[0] << 24;
8541 w1[2] = append1[0] >> 8 | append1[1] << 24;
8542 w1[3] = append1[1] >> 8 | append1[2] << 24;
8543 w2[0] = append1[2] >> 8 | append1[3] << 24;
8544 w2[1] = append1[3] >> 8;
8545 break;
8546
8547 case 8:
8548 w0[2] = append0[0];
8549 w0[3] = append0[1];
8550 w1[0] = append0[2];
8551 w1[1] = append0[3];
8552 w1[2] = append1[0];
8553 w1[3] = append1[1];
8554 w2[0] = append1[2];
8555 w2[1] = append1[3];
8556 break;
8557
8558 case 9:
8559 w0[2] = w0[2] | append0[0] << 8;
8560 w0[3] = append0[0] >> 24 | append0[1] << 8;
8561 w1[0] = append0[1] >> 24 | append0[2] << 8;
8562 w1[1] = append0[2] >> 24 | append0[3] << 8;
8563 w1[2] = append0[3] >> 24 | append1[0] << 8;
8564 w1[3] = append1[0] >> 24 | append1[1] << 8;
8565 w2[0] = append1[1] >> 24 | append1[2] << 8;
8566 w2[1] = append1[2] >> 24 | append1[3] << 8;
8567 w2[2] = append1[3] >> 24;
8568 break;
8569
8570 case 10:
8571 w0[2] = w0[2] | append0[0] << 16;
8572 w0[3] = append0[0] >> 16 | append0[1] << 16;
8573 w1[0] = append0[1] >> 16 | append0[2] << 16;
8574 w1[1] = append0[2] >> 16 | append0[3] << 16;
8575 w1[2] = append0[3] >> 16 | append1[0] << 16;
8576 w1[3] = append1[0] >> 16 | append1[1] << 16;
8577 w2[0] = append1[1] >> 16 | append1[2] << 16;
8578 w2[1] = append1[2] >> 16 | append1[3] << 16;
8579 w2[2] = append1[3] >> 16;
8580 break;
8581
8582 case 11:
8583 w0[2] = w0[2] | append0[0] << 24;
8584 w0[3] = append0[0] >> 8 | append0[1] << 24;
8585 w1[0] = append0[1] >> 8 | append0[2] << 24;
8586 w1[1] = append0[2] >> 8 | append0[3] << 24;
8587 w1[2] = append0[3] >> 8 | append1[0] << 24;
8588 w1[3] = append1[0] >> 8 | append1[1] << 24;
8589 w2[0] = append1[1] >> 8 | append1[2] << 24;
8590 w2[1] = append1[2] >> 8 | append1[3] << 24;
8591 w2[2] = append1[3] >> 8;
8592 break;
8593
8594 case 12:
8595 w0[3] = append0[0];
8596 w1[0] = append0[1];
8597 w1[1] = append0[2];
8598 w1[2] = append0[3];
8599 w1[3] = append1[0];
8600 w2[0] = append1[1];
8601 w2[1] = append1[2];
8602 w2[2] = append1[3];
8603 break;
8604
8605 case 13:
8606 w0[3] = w0[3] | append0[0] << 8;
8607 w1[0] = append0[0] >> 24 | append0[1] << 8;
8608 w1[1] = append0[1] >> 24 | append0[2] << 8;
8609 w1[2] = append0[2] >> 24 | append0[3] << 8;
8610 w1[3] = append0[3] >> 24 | append1[0] << 8;
8611 w2[0] = append1[0] >> 24 | append1[1] << 8;
8612 w2[1] = append1[1] >> 24 | append1[2] << 8;
8613 w2[2] = append1[2] >> 24 | append1[3] << 8;
8614 w2[3] = append1[3] >> 24;
8615 break;
8616
8617 case 14:
8618 w0[3] = w0[3] | append0[0] << 16;
8619 w1[0] = append0[0] >> 16 | append0[1] << 16;
8620 w1[1] = append0[1] >> 16 | append0[2] << 16;
8621 w1[2] = append0[2] >> 16 | append0[3] << 16;
8622 w1[3] = append0[3] >> 16 | append1[0] << 16;
8623 w2[0] = append1[0] >> 16 | append1[1] << 16;
8624 w2[1] = append1[1] >> 16 | append1[2] << 16;
8625 w2[2] = append1[2] >> 16 | append1[3] << 16;
8626 w2[3] = append1[3] >> 16;
8627 break;
8628
8629 case 15:
8630 w0[3] = w0[3] | append0[0] << 24;
8631 w1[0] = append0[0] >> 8 | append0[1] << 24;
8632 w1[1] = append0[1] >> 8 | append0[2] << 24;
8633 w1[2] = append0[2] >> 8 | append0[3] << 24;
8634 w1[3] = append0[3] >> 8 | append1[0] << 24;
8635 w2[0] = append1[0] >> 8 | append1[1] << 24;
8636 w2[1] = append1[1] >> 8 | append1[2] << 24;
8637 w2[2] = append1[2] >> 8 | append1[3] << 24;
8638 w2[3] = append1[3] >> 8;
8639 break;
8640
8641 case 16:
8642 w1[0] = append0[0];
8643 w1[1] = append0[1];
8644 w1[2] = append0[2];
8645 w1[3] = append0[3];
8646 w2[0] = append1[0];
8647 w2[1] = append1[1];
8648 w2[2] = append1[2];
8649 w2[3] = append1[3];
8650 break;
8651
8652 case 17:
8653 w1[0] = w1[0] | append0[0] << 8;
8654 w1[1] = append0[0] >> 24 | append0[1] << 8;
8655 w1[2] = append0[1] >> 24 | append0[2] << 8;
8656 w1[3] = append0[2] >> 24 | append0[3] << 8;
8657 w2[0] = append0[3] >> 24 | append1[0] << 8;
8658 w2[1] = append1[0] >> 24 | append1[1] << 8;
8659 w2[2] = append1[1] >> 24 | append1[2] << 8;
8660 w2[3] = append1[2] >> 24 | append1[3] << 8;
8661 w3[0] = append1[3] >> 24;
8662 break;
8663
8664 case 18:
8665 w1[0] = w1[0] | append0[0] << 16;
8666 w1[1] = append0[0] >> 16 | append0[1] << 16;
8667 w1[2] = append0[1] >> 16 | append0[2] << 16;
8668 w1[3] = append0[2] >> 16 | append0[3] << 16;
8669 w2[0] = append0[3] >> 16 | append1[0] << 16;
8670 w2[1] = append1[0] >> 16 | append1[1] << 16;
8671 w2[2] = append1[1] >> 16 | append1[2] << 16;
8672 w2[3] = append1[2] >> 16 | append1[3] << 16;
8673 w3[0] = append1[3] >> 16;
8674 break;
8675
8676 case 19:
8677 w1[0] = w1[0] | append0[0] << 24;
8678 w1[1] = append0[0] >> 8 | append0[1] << 24;
8679 w1[2] = append0[1] >> 8 | append0[2] << 24;
8680 w1[3] = append0[2] >> 8 | append0[3] << 24;
8681 w2[0] = append0[3] >> 8 | append1[0] << 24;
8682 w2[1] = append1[0] >> 8 | append1[1] << 24;
8683 w2[2] = append1[1] >> 8 | append1[2] << 24;
8684 w2[3] = append1[2] >> 8 | append1[3] << 24;
8685 w3[0] = append1[3] >> 8;
8686 break;
8687
8688 case 20:
8689 w1[1] = append0[0];
8690 w1[2] = append0[1];
8691 w1[3] = append0[2];
8692 w2[0] = append0[3];
8693 w2[1] = append1[0];
8694 w2[2] = append1[1];
8695 w2[3] = append1[2];
8696 w3[0] = append1[3];
8697 break;
8698
8699 case 21:
8700 w1[1] = w1[1] | append0[0] << 8;
8701 w1[2] = append0[0] >> 24 | append0[1] << 8;
8702 w1[3] = append0[1] >> 24 | append0[2] << 8;
8703 w2[0] = append0[2] >> 24 | append0[3] << 8;
8704 w2[1] = append0[3] >> 24 | append1[0] << 8;
8705 w2[2] = append1[0] >> 24 | append1[1] << 8;
8706 w2[3] = append1[1] >> 24 | append1[2] << 8;
8707 w3[0] = append1[2] >> 24 | append1[3] << 8;
8708 w3[1] = append1[3] >> 24;
8709 break;
8710
8711 case 22:
8712 w1[1] = w1[1] | append0[0] << 16;
8713 w1[2] = append0[0] >> 16 | append0[1] << 16;
8714 w1[3] = append0[1] >> 16 | append0[2] << 16;
8715 w2[0] = append0[2] >> 16 | append0[3] << 16;
8716 w2[1] = append0[3] >> 16 | append1[0] << 16;
8717 w2[2] = append1[0] >> 16 | append1[1] << 16;
8718 w2[3] = append1[1] >> 16 | append1[2] << 16;
8719 w3[0] = append1[2] >> 16 | append1[3] << 16;
8720 w3[1] = append1[3] >> 16;
8721 break;
8722
8723 case 23:
8724 w1[1] = w1[1] | append0[0] << 24;
8725 w1[2] = append0[0] >> 8 | append0[1] << 24;
8726 w1[3] = append0[1] >> 8 | append0[2] << 24;
8727 w2[0] = append0[2] >> 8 | append0[3] << 24;
8728 w2[1] = append0[3] >> 8 | append1[0] << 24;
8729 w2[2] = append1[0] >> 8 | append1[1] << 24;
8730 w2[3] = append1[1] >> 8 | append1[2] << 24;
8731 w3[0] = append1[2] >> 8 | append1[3] << 24;
8732 w3[1] = append1[3] >> 8;
8733 break;
8734
8735 case 24:
8736 w1[2] = append0[0];
8737 w1[3] = append0[1];
8738 w2[0] = append0[2];
8739 w2[1] = append0[3];
8740 w2[2] = append1[0];
8741 w2[3] = append1[1];
8742 w3[0] = append1[2];
8743 w3[1] = append1[3];
8744 break;
8745
8746 case 25:
8747 w1[2] = w1[2] | append0[0] << 8;
8748 w1[3] = append0[0] >> 24 | append0[1] << 8;
8749 w2[0] = append0[1] >> 24 | append0[2] << 8;
8750 w2[1] = append0[2] >> 24 | append0[3] << 8;
8751 w2[2] = append0[3] >> 24 | append1[0] << 8;
8752 w2[3] = append1[0] >> 24 | append1[1] << 8;
8753 w3[0] = append1[1] >> 24 | append1[2] << 8;
8754 w3[1] = append1[2] >> 24 | append1[3] << 8;
8755 break;
8756
8757 case 26:
8758 w1[2] = w1[2] | append0[0] << 16;
8759 w1[3] = append0[0] >> 16 | append0[1] << 16;
8760 w2[0] = append0[1] >> 16 | append0[2] << 16;
8761 w2[1] = append0[2] >> 16 | append0[3] << 16;
8762 w2[2] = append0[3] >> 16 | append1[0] << 16;
8763 w2[3] = append1[0] >> 16 | append1[1] << 16;
8764 w3[0] = append1[1] >> 16 | append1[2] << 16;
8765 w3[1] = append1[2] >> 16 | append1[3] << 16;
8766 break;
8767
8768 case 27:
8769 w1[2] = w1[2] | append0[0] << 24;
8770 w1[3] = append0[0] >> 8 | append0[1] << 24;
8771 w2[0] = append0[1] >> 8 | append0[2] << 24;
8772 w2[1] = append0[2] >> 8 | append0[3] << 24;
8773 w2[2] = append0[3] >> 8 | append1[0] << 24;
8774 w2[3] = append1[0] >> 8 | append1[1] << 24;
8775 w3[0] = append1[1] >> 8 | append1[2] << 24;
8776 w3[1] = append1[2] >> 8 | append1[3] << 24;
8777 break;
8778
8779 case 28:
8780 w1[3] = append0[0];
8781 w2[0] = append0[1];
8782 w2[1] = append0[2];
8783 w2[2] = append0[3];
8784 w2[3] = append1[0];
8785 w3[0] = append1[1];
8786 w3[1] = append1[2];
8787 break;
8788
8789 case 29:
8790 w1[3] = w1[3] | append0[0] << 8;
8791 w2[0] = append0[0] >> 24 | append0[1] << 8;
8792 w2[1] = append0[1] >> 24 | append0[2] << 8;
8793 w2[2] = append0[2] >> 24 | append0[3] << 8;
8794 w2[3] = append0[3] >> 24 | append1[0] << 8;
8795 w3[0] = append1[0] >> 24 | append1[1] << 8;
8796 w3[1] = append1[1] >> 24 | append1[2] << 8;
8797 break;
8798
8799 case 30:
8800 w1[3] = w1[3] | append0[0] << 16;
8801 w2[0] = append0[0] >> 16 | append0[1] << 16;
8802 w2[1] = append0[1] >> 16 | append0[2] << 16;
8803 w2[2] = append0[2] >> 16 | append0[3] << 16;
8804 w2[3] = append0[3] >> 16 | append1[0] << 16;
8805 w3[0] = append1[0] >> 16 | append1[1] << 16;
8806 w3[1] = append1[1] >> 16 | append1[2] << 16;
8807 break;
8808
8809 case 31:
8810 w1[3] = w1[3] | append0[0] << 24;
8811 w2[0] = append0[0] >> 8 | append0[1] << 24;
8812 w2[1] = append0[1] >> 8 | append0[2] << 24;
8813 w2[2] = append0[2] >> 8 | append0[3] << 24;
8814 w2[3] = append0[3] >> 8 | append1[0] << 24;
8815 w3[0] = append1[0] >> 8 | append1[1] << 24;
8816 w3[1] = append1[1] >> 8 | append1[2] << 24;
8817 break;
8818
8819 case 32:
8820 w2[0] = append0[0];
8821 w2[1] = append0[1];
8822 w2[2] = append0[2];
8823 w2[3] = append0[3];
8824 w3[0] = append1[0];
8825 w3[1] = append1[1];
8826 break;
8827 }
8828 }
8829
8830 // before: memcat32_9
8831 static void memcat_c32_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
8832 {
8833 switch (offset)
8834 {
8835 case 0:
8836 w0[0] = append0[0];
8837 w0[1] = append0[1];
8838 w0[2] = append0[2];
8839 w0[3] = append0[3];
8840 w1[0] = append1[0];
8841 w1[1] = append1[1];
8842 w1[2] = append1[2];
8843 w1[3] = append1[3];
8844 w2[0] = append2[0];
8845 break;
8846
8847 case 1:
8848 w0[0] = w0[0] | append0[0] << 8;
8849 w0[1] = append0[0] >> 24 | append0[1] << 8;
8850 w0[2] = append0[1] >> 24 | append0[2] << 8;
8851 w0[3] = append0[2] >> 24 | append0[3] << 8;
8852 w1[0] = append0[3] >> 24 | append1[0] << 8;
8853 w1[1] = append1[0] >> 24 | append1[1] << 8;
8854 w1[2] = append1[1] >> 24 | append1[2] << 8;
8855 w1[3] = append1[2] >> 24 | append1[3] << 8;
8856 w2[0] = append1[3] >> 24 | append2[0] << 8;
8857 w2[1] = append2[0] >> 24;
8858 break;
8859
8860 case 2:
8861 w0[0] = w0[0] | append0[0] << 16;
8862 w0[1] = append0[0] >> 16 | append0[1] << 16;
8863 w0[2] = append0[1] >> 16 | append0[2] << 16;
8864 w0[3] = append0[2] >> 16 | append0[3] << 16;
8865 w1[0] = append0[3] >> 16 | append1[0] << 16;
8866 w1[1] = append1[0] >> 16 | append1[1] << 16;
8867 w1[2] = append1[1] >> 16 | append1[2] << 16;
8868 w1[3] = append1[2] >> 16 | append1[3] << 16;
8869 w2[0] = append1[3] >> 16 | append2[0] << 16;
8870 w2[1] = append2[0] >> 16;
8871 break;
8872
8873 case 3:
8874 w0[0] = w0[0] | append0[0] << 24;
8875 w0[1] = append0[0] >> 8 | append0[1] << 24;
8876 w0[2] = append0[1] >> 8 | append0[2] << 24;
8877 w0[3] = append0[2] >> 8 | append0[3] << 24;
8878 w1[0] = append0[3] >> 8 | append1[0] << 24;
8879 w1[1] = append1[0] >> 8 | append1[1] << 24;
8880 w1[2] = append1[1] >> 8 | append1[2] << 24;
8881 w1[3] = append1[2] >> 8 | append1[3] << 24;
8882 w2[0] = append1[3] >> 8 | append2[0] << 24;
8883 w2[1] = append2[0] >> 8;
8884 break;
8885
8886 case 4:
8887 w0[1] = append0[0];
8888 w0[2] = append0[1];
8889 w0[3] = append0[2];
8890 w1[0] = append0[3];
8891 w1[1] = append1[0];
8892 w1[2] = append1[1];
8893 w1[3] = append1[2];
8894 w2[0] = append1[3];
8895 w2[1] = append2[0];
8896 break;
8897
8898 case 5:
8899 w0[1] = w0[1] | append0[0] << 8;
8900 w0[2] = append0[0] >> 24 | append0[1] << 8;
8901 w0[3] = append0[1] >> 24 | append0[2] << 8;
8902 w1[0] = append0[2] >> 24 | append0[3] << 8;
8903 w1[1] = append0[3] >> 24 | append1[0] << 8;
8904 w1[2] = append1[0] >> 24 | append1[1] << 8;
8905 w1[3] = append1[1] >> 24 | append1[2] << 8;
8906 w2[0] = append1[2] >> 24 | append1[3] << 8;
8907 w2[1] = append1[3] >> 24 | append2[0] << 8;
8908 w2[2] = append2[0] >> 24;
8909 break;
8910
8911 case 6:
8912 w0[1] = w0[1] | append0[0] << 16;
8913 w0[2] = append0[0] >> 16 | append0[1] << 16;
8914 w0[3] = append0[1] >> 16 | append0[2] << 16;
8915 w1[0] = append0[2] >> 16 | append0[3] << 16;
8916 w1[1] = append0[3] >> 16 | append1[0] << 16;
8917 w1[2] = append1[0] >> 16 | append1[1] << 16;
8918 w1[3] = append1[1] >> 16 | append1[2] << 16;
8919 w2[0] = append1[2] >> 16 | append1[3] << 16;
8920 w2[1] = append1[3] >> 16 | append2[0] << 16;
8921 w2[2] = append2[0] >> 16;
8922 break;
8923
8924 case 7:
8925 w0[1] = w0[1] | append0[0] << 24;
8926 w0[2] = append0[0] >> 8 | append0[1] << 24;
8927 w0[3] = append0[1] >> 8 | append0[2] << 24;
8928 w1[0] = append0[2] >> 8 | append0[3] << 24;
8929 w1[1] = append0[3] >> 8 | append1[0] << 24;
8930 w1[2] = append1[0] >> 8 | append1[1] << 24;
8931 w1[3] = append1[1] >> 8 | append1[2] << 24;
8932 w2[0] = append1[2] >> 8 | append1[3] << 24;
8933 w2[1] = append1[3] >> 8 | append2[0] << 24;
8934 w2[2] = append2[0] >> 8;
8935 break;
8936
8937 case 8:
8938 w0[2] = append0[0];
8939 w0[3] = append0[1];
8940 w1[0] = append0[2];
8941 w1[1] = append0[3];
8942 w1[2] = append1[0];
8943 w1[3] = append1[1];
8944 w2[0] = append1[2];
8945 w2[1] = append1[3];
8946 w2[2] = append2[0];
8947 break;
8948
8949 case 9:
8950 w0[2] = w0[2] | append0[0] << 8;
8951 w0[3] = append0[0] >> 24 | append0[1] << 8;
8952 w1[0] = append0[1] >> 24 | append0[2] << 8;
8953 w1[1] = append0[2] >> 24 | append0[3] << 8;
8954 w1[2] = append0[3] >> 24 | append1[0] << 8;
8955 w1[3] = append1[0] >> 24 | append1[1] << 8;
8956 w2[0] = append1[1] >> 24 | append1[2] << 8;
8957 w2[1] = append1[2] >> 24 | append1[3] << 8;
8958 w2[2] = append1[3] >> 24 | append2[0] << 8;
8959 w2[3] = append2[0] >> 24;
8960 break;
8961
8962 case 10:
8963 w0[2] = w0[2] | append0[0] << 16;
8964 w0[3] = append0[0] >> 16 | append0[1] << 16;
8965 w1[0] = append0[1] >> 16 | append0[2] << 16;
8966 w1[1] = append0[2] >> 16 | append0[3] << 16;
8967 w1[2] = append0[3] >> 16 | append1[0] << 16;
8968 w1[3] = append1[0] >> 16 | append1[1] << 16;
8969 w2[0] = append1[1] >> 16 | append1[2] << 16;
8970 w2[1] = append1[2] >> 16 | append1[3] << 16;
8971 w2[2] = append1[3] >> 16 | append2[0] << 16;
8972 w2[3] = append2[0] >> 16;
8973 break;
8974
8975 case 11:
8976 w0[2] = w0[2] | append0[0] << 24;
8977 w0[3] = append0[0] >> 8 | append0[1] << 24;
8978 w1[0] = append0[1] >> 8 | append0[2] << 24;
8979 w1[1] = append0[2] >> 8 | append0[3] << 24;
8980 w1[2] = append0[3] >> 8 | append1[0] << 24;
8981 w1[3] = append1[0] >> 8 | append1[1] << 24;
8982 w2[0] = append1[1] >> 8 | append1[2] << 24;
8983 w2[1] = append1[2] >> 8 | append1[3] << 24;
8984 w2[2] = append1[3] >> 8 | append2[0] << 24;
8985 w2[3] = append2[0] >> 8;
8986 break;
8987
8988 case 12:
8989 w0[3] = append0[0];
8990 w1[0] = append0[1];
8991 w1[1] = append0[2];
8992 w1[2] = append0[3];
8993 w1[3] = append1[0];
8994 w2[0] = append1[1];
8995 w2[1] = append1[2];
8996 w2[2] = append1[3];
8997 w2[3] = append2[0];
8998 break;
8999
9000 case 13:
9001 w0[3] = w0[3] | append0[0] << 8;
9002 w1[0] = append0[0] >> 24 | append0[1] << 8;
9003 w1[1] = append0[1] >> 24 | append0[2] << 8;
9004 w1[2] = append0[2] >> 24 | append0[3] << 8;
9005 w1[3] = append0[3] >> 24 | append1[0] << 8;
9006 w2[0] = append1[0] >> 24 | append1[1] << 8;
9007 w2[1] = append1[1] >> 24 | append1[2] << 8;
9008 w2[2] = append1[2] >> 24 | append1[3] << 8;
9009 w2[3] = append1[3] >> 24 | append2[0] << 8;
9010 w3[0] = append2[0] >> 24;
9011 break;
9012
9013 case 14:
9014 w0[3] = w0[3] | append0[0] << 16;
9015 w1[0] = append0[0] >> 16 | append0[1] << 16;
9016 w1[1] = append0[1] >> 16 | append0[2] << 16;
9017 w1[2] = append0[2] >> 16 | append0[3] << 16;
9018 w1[3] = append0[3] >> 16 | append1[0] << 16;
9019 w2[0] = append1[0] >> 16 | append1[1] << 16;
9020 w2[1] = append1[1] >> 16 | append1[2] << 16;
9021 w2[2] = append1[2] >> 16 | append1[3] << 16;
9022 w2[3] = append1[3] >> 16 | append2[0] << 16;
9023 w3[0] = append2[0] >> 16;
9024 break;
9025
9026 case 15:
9027 w0[3] = w0[3] | append0[0] << 24;
9028 w1[0] = append0[0] >> 8 | append0[1] << 24;
9029 w1[1] = append0[1] >> 8 | append0[2] << 24;
9030 w1[2] = append0[2] >> 8 | append0[3] << 24;
9031 w1[3] = append0[3] >> 8 | append1[0] << 24;
9032 w2[0] = append1[0] >> 8 | append1[1] << 24;
9033 w2[1] = append1[1] >> 8 | append1[2] << 24;
9034 w2[2] = append1[2] >> 8 | append1[3] << 24;
9035 w2[3] = append1[3] >> 8 | append2[0] << 24;
9036 w3[0] = append2[0] >> 8;
9037 break;
9038
9039 case 16:
9040 w1[0] = append0[0];
9041 w1[1] = append0[1];
9042 w1[2] = append0[2];
9043 w1[3] = append0[3];
9044 w2[0] = append1[0];
9045 w2[1] = append1[1];
9046 w2[2] = append1[2];
9047 w2[3] = append1[3];
9048 w3[0] = append2[0];
9049 break;
9050
9051 case 17:
9052 w1[0] = w1[0] | append0[0] << 8;
9053 w1[1] = append0[0] >> 24 | append0[1] << 8;
9054 w1[2] = append0[1] >> 24 | append0[2] << 8;
9055 w1[3] = append0[2] >> 24 | append0[3] << 8;
9056 w2[0] = append0[3] >> 24 | append1[0] << 8;
9057 w2[1] = append1[0] >> 24 | append1[1] << 8;
9058 w2[2] = append1[1] >> 24 | append1[2] << 8;
9059 w2[3] = append1[2] >> 24 | append1[3] << 8;
9060 w3[0] = append1[3] >> 24 | append2[0] << 8;
9061 w3[1] = append2[0] >> 24;
9062 break;
9063
9064 case 18:
9065 w1[0] = w1[0] | append0[0] << 16;
9066 w1[1] = append0[0] >> 16 | append0[1] << 16;
9067 w1[2] = append0[1] >> 16 | append0[2] << 16;
9068 w1[3] = append0[2] >> 16 | append0[3] << 16;
9069 w2[0] = append0[3] >> 16 | append1[0] << 16;
9070 w2[1] = append1[0] >> 16 | append1[1] << 16;
9071 w2[2] = append1[1] >> 16 | append1[2] << 16;
9072 w2[3] = append1[2] >> 16 | append1[3] << 16;
9073 w3[0] = append1[3] >> 16 | append2[0] << 16;
9074 w3[1] = append2[0] >> 16;
9075 break;
9076
9077 case 19:
9078 w1[0] = w1[0] | append0[0] << 24;
9079 w1[1] = append0[0] >> 8 | append0[1] << 24;
9080 w1[2] = append0[1] >> 8 | append0[2] << 24;
9081 w1[3] = append0[2] >> 8 | append0[3] << 24;
9082 w2[0] = append0[3] >> 8 | append1[0] << 24;
9083 w2[1] = append1[0] >> 8 | append1[1] << 24;
9084 w2[2] = append1[1] >> 8 | append1[2] << 24;
9085 w2[3] = append1[2] >> 8 | append1[3] << 24;
9086 w3[0] = append1[3] >> 8 | append2[0] << 24;
9087 w3[1] = append2[0] >> 8;
9088 break;
9089
9090 case 20:
9091 w1[1] = append0[0];
9092 w1[2] = append0[1];
9093 w1[3] = append0[2];
9094 w2[0] = append0[3];
9095 w2[1] = append1[0];
9096 w2[2] = append1[1];
9097 w2[3] = append1[2];
9098 w3[0] = append1[3];
9099 w3[1] = append2[0];
9100 break;
9101
9102 case 21:
9103 w1[1] = w1[1] | append0[0] << 8;
9104 w1[2] = append0[0] >> 24 | append0[1] << 8;
9105 w1[3] = append0[1] >> 24 | append0[2] << 8;
9106 w2[0] = append0[2] >> 24 | append0[3] << 8;
9107 w2[1] = append0[3] >> 24 | append1[0] << 8;
9108 w2[2] = append1[0] >> 24 | append1[1] << 8;
9109 w2[3] = append1[1] >> 24 | append1[2] << 8;
9110 w3[0] = append1[2] >> 24 | append1[3] << 8;
9111 w3[1] = append1[3] >> 24 | append2[0] << 8;
9112 break;
9113
9114 case 22:
9115 w1[1] = w1[1] | append0[0] << 16;
9116 w1[2] = append0[0] >> 16 | append0[1] << 16;
9117 w1[3] = append0[1] >> 16 | append0[2] << 16;
9118 w2[0] = append0[2] >> 16 | append0[3] << 16;
9119 w2[1] = append0[3] >> 16 | append1[0] << 16;
9120 w2[2] = append1[0] >> 16 | append1[1] << 16;
9121 w2[3] = append1[1] >> 16 | append1[2] << 16;
9122 w3[0] = append1[2] >> 16 | append1[3] << 16;
9123 w3[1] = append1[3] >> 16 | append2[0] << 16;
9124 break;
9125
9126 case 23:
9127 w1[1] = w1[1] | append0[0] << 24;
9128 w1[2] = append0[0] >> 8 | append0[1] << 24;
9129 w1[3] = append0[1] >> 8 | append0[2] << 24;
9130 w2[0] = append0[2] >> 8 | append0[3] << 24;
9131 w2[1] = append0[3] >> 8 | append1[0] << 24;
9132 w2[2] = append1[0] >> 8 | append1[1] << 24;
9133 w2[3] = append1[1] >> 8 | append1[2] << 24;
9134 w3[0] = append1[2] >> 8 | append1[3] << 24;
9135 w3[1] = append1[3] >> 8 | append2[0] << 24;
9136 break;
9137
9138 case 24:
9139 w1[2] = append0[0];
9140 w1[3] = append0[1];
9141 w2[0] = append0[2];
9142 w2[1] = append0[3];
9143 w2[2] = append1[0];
9144 w2[3] = append1[1];
9145 w3[0] = append1[2];
9146 w3[1] = append1[3];
9147 break;
9148
9149 case 25:
9150 w1[2] = w1[2] | append0[0] << 8;
9151 w1[3] = append0[0] >> 24 | append0[1] << 8;
9152 w2[0] = append0[1] >> 24 | append0[2] << 8;
9153 w2[1] = append0[2] >> 24 | append0[3] << 8;
9154 w2[2] = append0[3] >> 24 | append1[0] << 8;
9155 w2[3] = append1[0] >> 24 | append1[1] << 8;
9156 w3[0] = append1[1] >> 24 | append1[2] << 8;
9157 w3[1] = append1[2] >> 24 | append1[3] << 8;
9158 break;
9159
9160 case 26:
9161 w1[2] = w1[2] | append0[0] << 16;
9162 w1[3] = append0[0] >> 16 | append0[1] << 16;
9163 w2[0] = append0[1] >> 16 | append0[2] << 16;
9164 w2[1] = append0[2] >> 16 | append0[3] << 16;
9165 w2[2] = append0[3] >> 16 | append1[0] << 16;
9166 w2[3] = append1[0] >> 16 | append1[1] << 16;
9167 w3[0] = append1[1] >> 16 | append1[2] << 16;
9168 w3[1] = append1[2] >> 16 | append1[3] << 16;
9169 break;
9170
9171 case 27:
9172 w1[2] = w1[2] | append0[0] << 24;
9173 w1[3] = append0[0] >> 8 | append0[1] << 24;
9174 w2[0] = append0[1] >> 8 | append0[2] << 24;
9175 w2[1] = append0[2] >> 8 | append0[3] << 24;
9176 w2[2] = append0[3] >> 8 | append1[0] << 24;
9177 w2[3] = append1[0] >> 8 | append1[1] << 24;
9178 w3[0] = append1[1] >> 8 | append1[2] << 24;
9179 w3[1] = append1[2] >> 8 | append1[3] << 24;
9180 break;
9181
9182 case 28:
9183 w1[3] = append0[0];
9184 w2[0] = append0[1];
9185 w2[1] = append0[2];
9186 w2[2] = append0[3];
9187 w2[3] = append1[0];
9188 w3[0] = append1[1];
9189 w3[1] = append1[2];
9190 break;
9191
9192 case 29:
9193 w1[3] = w1[3] | append0[0] << 8;
9194 w2[0] = append0[0] >> 24 | append0[1] << 8;
9195 w2[1] = append0[1] >> 24 | append0[2] << 8;
9196 w2[2] = append0[2] >> 24 | append0[3] << 8;
9197 w2[3] = append0[3] >> 24 | append1[0] << 8;
9198 w3[0] = append1[0] >> 24 | append1[1] << 8;
9199 w3[1] = append1[1] >> 24 | append1[2] << 8;
9200 break;
9201
9202 case 30:
9203 w1[3] = w1[3] | append0[0] << 16;
9204 w2[0] = append0[0] >> 16 | append0[1] << 16;
9205 w2[1] = append0[1] >> 16 | append0[2] << 16;
9206 w2[2] = append0[2] >> 16 | append0[3] << 16;
9207 w2[3] = append0[3] >> 16 | append1[0] << 16;
9208 w3[0] = append1[0] >> 16 | append1[1] << 16;
9209 w3[1] = append1[1] >> 16 | append1[2] << 16;
9210 break;
9211
9212 case 31:
9213 w1[3] = w1[3] | append0[0] << 24;
9214 w2[0] = append0[0] >> 8 | append0[1] << 24;
9215 w2[1] = append0[1] >> 8 | append0[2] << 24;
9216 w2[2] = append0[2] >> 8 | append0[3] << 24;
9217 w2[3] = append0[3] >> 8 | append1[0] << 24;
9218 w3[0] = append1[0] >> 8 | append1[1] << 24;
9219 w3[1] = append1[1] >> 8 | append1[2] << 24;
9220 break;
9221
9222 case 32:
9223 w2[0] = append0[0];
9224 w2[1] = append0[1];
9225 w2[2] = append0[2];
9226 w2[3] = append0[3];
9227 w3[0] = append1[0];
9228 w3[1] = append1[1];
9229 break;
9230 }
9231 }
9232
9233 */