cdef71738485fe1de33ae9d0deff971812526741
[hashcat.git] / OpenCL / common.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 static int hash_comp (const u32 d1[4], __global u32 *d2)
7 {
8 if (d1[3] > d2[DGST_R3]) return ( 1);
9 if (d1[3] < d2[DGST_R3]) return (-1);
10 if (d1[2] > d2[DGST_R2]) return ( 1);
11 if (d1[2] < d2[DGST_R2]) return (-1);
12 if (d1[1] > d2[DGST_R1]) return ( 1);
13 if (d1[1] < d2[DGST_R1]) return (-1);
14 if (d1[0] > d2[DGST_R0]) return ( 1);
15 if (d1[0] < d2[DGST_R0]) return (-1);
16
17 return (0);
18 }
19
20 static int find_hash (const u32 digest[4], const u32 digests_cnt, __global digest_t *digests_buf)
21 {
22 for (u32 l = 0, r = digests_cnt; r; r >>= 1)
23 {
24 const u32 m = r >> 1;
25
26 const u32 c = l + m;
27
28 const int cmp = hash_comp (digest, digests_buf[c].digest_buf);
29
30 if (cmp > 0)
31 {
32 l += m + 1;
33
34 r--;
35 }
36
37 if (cmp == 0) return (c);
38 }
39
40 return (-1);
41 }
42
43 static u32 check_bitmap (__global u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest)
44 {
45 return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f)));
46 }
47
48 static u32 check (const u32 digest[2], __global u32 *bitmap_s1_a, __global u32 *bitmap_s1_b, __global u32 *bitmap_s1_c, __global u32 *bitmap_s1_d, __global u32 *bitmap_s2_a, __global u32 *bitmap_s2_b, __global u32 *bitmap_s2_c, __global u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2)
49 {
50 if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0);
51 if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0);
52 if (check_bitmap (bitmap_s1_c, bitmap_mask, bitmap_shift1, digest[2]) == 0) return (0);
53 if (check_bitmap (bitmap_s1_d, bitmap_mask, bitmap_shift1, digest[3]) == 0) return (0);
54
55 if (check_bitmap (bitmap_s2_a, bitmap_mask, bitmap_shift2, digest[0]) == 0) return (0);
56 if (check_bitmap (bitmap_s2_b, bitmap_mask, bitmap_shift2, digest[1]) == 0) return (0);
57 if (check_bitmap (bitmap_s2_c, bitmap_mask, bitmap_shift2, digest[2]) == 0) return (0);
58 if (check_bitmap (bitmap_s2_d, bitmap_mask, bitmap_shift2, digest[3]) == 0) return (0);
59
60 return (1);
61 }
62
63 static void mark_hash (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos)
64 {
65 hashes_shown[hash_pos] = 1;
66
67 plains_buf[hash_pos].gidvid = (gid * 1) + 0;
68 plains_buf[hash_pos].il_pos = il_pos;
69 }
70
71 static void truncate_block (u32 w[4], const u32 len)
72 {
73 switch (len)
74 {
75 case 0: w[0] &= 0;
76 w[1] &= 0;
77 w[2] &= 0;
78 w[3] &= 0;
79 break;
80 case 1: w[0] &= 0x000000FF;
81 w[1] &= 0;
82 w[2] &= 0;
83 w[3] &= 0;
84 break;
85 case 2: w[0] &= 0x0000FFFF;
86 w[1] &= 0;
87 w[2] &= 0;
88 w[3] &= 0;
89 break;
90 case 3: w[0] &= 0x00FFFFFF;
91 w[1] &= 0;
92 w[2] &= 0;
93 w[3] &= 0;
94 break;
95 case 4: w[1] &= 0;
96 w[2] &= 0;
97 w[3] &= 0;
98 break;
99 case 5: w[1] &= 0x000000FF;
100 w[2] &= 0;
101 w[3] &= 0;
102 break;
103 case 6: w[1] &= 0x0000FFFF;
104 w[2] &= 0;
105 w[3] &= 0;
106 break;
107 case 7: w[1] &= 0x00FFFFFF;
108 w[2] &= 0;
109 w[3] &= 0;
110 break;
111 case 8: w[2] &= 0;
112 w[3] &= 0;
113 break;
114 case 9: w[2] &= 0x000000FF;
115 w[3] &= 0;
116 break;
117 case 10: w[2] &= 0x0000FFFF;
118 w[3] &= 0;
119 break;
120 case 11: w[2] &= 0x00FFFFFF;
121 w[3] &= 0;
122 break;
123 case 12: w[3] &= 0;
124 break;
125 case 13: w[3] &= 0x000000FF;
126 break;
127 case 14: w[3] &= 0x0000FFFF;
128 break;
129 case 15: w[3] &= 0x00FFFFFF;
130 break;
131 }
132 }
133
134 static void make_unicode (const u32 in[4], u32 out1[4], u32 out2[4])
135 {
136 #ifdef IS_NV
137 out2[3] = __byte_perm (in[3], 0, 0x7372);
138 out2[2] = __byte_perm (in[3], 0, 0x7170);
139 out2[1] = __byte_perm (in[2], 0, 0x7372);
140 out2[0] = __byte_perm (in[2], 0, 0x7170);
141 out1[3] = __byte_perm (in[1], 0, 0x7372);
142 out1[2] = __byte_perm (in[1], 0, 0x7170);
143 out1[1] = __byte_perm (in[0], 0, 0x7372);
144 out1[0] = __byte_perm (in[0], 0, 0x7170);
145 #endif
146
147 #if defined IS_AMD || defined IS_GENERIC
148 out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
149 out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
150 out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
151 out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF);
152 out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF);
153 out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
154 out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
155 out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
156 #endif
157 }
158
159 static void undo_unicode (const u32 in1[4], const u32 in2[4], u32 out[4])
160 {
161 #ifdef IS_NV
162 out[0] = __byte_perm (in1[0], in1[1], 0x6420);
163 out[1] = __byte_perm (in1[2], in1[3], 0x6420);
164 out[2] = __byte_perm (in2[0], in2[1], 0x6420);
165 out[3] = __byte_perm (in2[2], in2[3], 0x6420);
166 #endif
167
168 #if defined IS_AMD || defined IS_GENERIC
169 out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
170 | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
171 out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
172 | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8);
173 out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8)
174 | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
175 out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
176 | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
177 #endif
178 }
179
180 static void append_0x01_1x4 (u32 w0[4], const u32 offset)
181 {
182 switch (offset)
183 {
184 case 0:
185 w0[0] = 0x01;
186 break;
187
188 case 1:
189 w0[0] = w0[0] | 0x0100;
190 break;
191
192 case 2:
193 w0[0] = w0[0] | 0x010000;
194 break;
195
196 case 3:
197 w0[0] = w0[0] | 0x01000000;
198 break;
199
200 case 4:
201 w0[1] = 0x01;
202 break;
203
204 case 5:
205 w0[1] = w0[1] | 0x0100;
206 break;
207
208 case 6:
209 w0[1] = w0[1] | 0x010000;
210 break;
211
212 case 7:
213 w0[1] = w0[1] | 0x01000000;
214 break;
215
216 case 8:
217 w0[2] = 0x01;
218 break;
219
220 case 9:
221 w0[2] = w0[2] | 0x0100;
222 break;
223
224 case 10:
225 w0[2] = w0[2] | 0x010000;
226 break;
227
228 case 11:
229 w0[2] = w0[2] | 0x01000000;
230 break;
231
232 case 12:
233 w0[3] = 0x01;
234 break;
235
236 case 13:
237 w0[3] = w0[3] | 0x0100;
238 break;
239
240 case 14:
241 w0[3] = w0[3] | 0x010000;
242 break;
243
244 case 15:
245 w0[3] = w0[3] | 0x01000000;
246 break;
247 }
248 }
249
250 static void append_0x01_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
251 {
252 switch (offset)
253 {
254 case 0:
255 w0[0] = 0x01;
256 break;
257
258 case 1:
259 w0[0] = w0[0] | 0x0100;
260 break;
261
262 case 2:
263 w0[0] = w0[0] | 0x010000;
264 break;
265
266 case 3:
267 w0[0] = w0[0] | 0x01000000;
268 break;
269
270 case 4:
271 w0[1] = 0x01;
272 break;
273
274 case 5:
275 w0[1] = w0[1] | 0x0100;
276 break;
277
278 case 6:
279 w0[1] = w0[1] | 0x010000;
280 break;
281
282 case 7:
283 w0[1] = w0[1] | 0x01000000;
284 break;
285
286 case 8:
287 w0[2] = 0x01;
288 break;
289
290 case 9:
291 w0[2] = w0[2] | 0x0100;
292 break;
293
294 case 10:
295 w0[2] = w0[2] | 0x010000;
296 break;
297
298 case 11:
299 w0[2] = w0[2] | 0x01000000;
300 break;
301
302 case 12:
303 w0[3] = 0x01;
304 break;
305
306 case 13:
307 w0[3] = w0[3] | 0x0100;
308 break;
309
310 case 14:
311 w0[3] = w0[3] | 0x010000;
312 break;
313
314 case 15:
315 w0[3] = w0[3] | 0x01000000;
316 break;
317
318 case 16:
319 w1[0] = 0x01;
320 break;
321
322 case 17:
323 w1[0] = w1[0] | 0x0100;
324 break;
325
326 case 18:
327 w1[0] = w1[0] | 0x010000;
328 break;
329
330 case 19:
331 w1[0] = w1[0] | 0x01000000;
332 break;
333
334 case 20:
335 w1[1] = 0x01;
336 break;
337
338 case 21:
339 w1[1] = w1[1] | 0x0100;
340 break;
341
342 case 22:
343 w1[1] = w1[1] | 0x010000;
344 break;
345
346 case 23:
347 w1[1] = w1[1] | 0x01000000;
348 break;
349
350 case 24:
351 w1[2] = 0x01;
352 break;
353
354 case 25:
355 w1[2] = w1[2] | 0x0100;
356 break;
357
358 case 26:
359 w1[2] = w1[2] | 0x010000;
360 break;
361
362 case 27:
363 w1[2] = w1[2] | 0x01000000;
364 break;
365
366 case 28:
367 w1[3] = 0x01;
368 break;
369
370 case 29:
371 w1[3] = w1[3] | 0x0100;
372 break;
373
374 case 30:
375 w1[3] = w1[3] | 0x010000;
376 break;
377
378 case 31:
379 w1[3] = w1[3] | 0x01000000;
380 break;
381 }
382 }
383
384 static void append_0x01_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
385 {
386 switch (offset)
387 {
388 case 0:
389 w0[0] = 0x01;
390 break;
391
392 case 1:
393 w0[0] = w0[0] | 0x0100;
394 break;
395
396 case 2:
397 w0[0] = w0[0] | 0x010000;
398 break;
399
400 case 3:
401 w0[0] = w0[0] | 0x01000000;
402 break;
403
404 case 4:
405 w0[1] = 0x01;
406 break;
407
408 case 5:
409 w0[1] = w0[1] | 0x0100;
410 break;
411
412 case 6:
413 w0[1] = w0[1] | 0x010000;
414 break;
415
416 case 7:
417 w0[1] = w0[1] | 0x01000000;
418 break;
419
420 case 8:
421 w0[2] = 0x01;
422 break;
423
424 case 9:
425 w0[2] = w0[2] | 0x0100;
426 break;
427
428 case 10:
429 w0[2] = w0[2] | 0x010000;
430 break;
431
432 case 11:
433 w0[2] = w0[2] | 0x01000000;
434 break;
435
436 case 12:
437 w0[3] = 0x01;
438 break;
439
440 case 13:
441 w0[3] = w0[3] | 0x0100;
442 break;
443
444 case 14:
445 w0[3] = w0[3] | 0x010000;
446 break;
447
448 case 15:
449 w0[3] = w0[3] | 0x01000000;
450 break;
451
452 case 16:
453 w1[0] = 0x01;
454 break;
455
456 case 17:
457 w1[0] = w1[0] | 0x0100;
458 break;
459
460 case 18:
461 w1[0] = w1[0] | 0x010000;
462 break;
463
464 case 19:
465 w1[0] = w1[0] | 0x01000000;
466 break;
467
468 case 20:
469 w1[1] = 0x01;
470 break;
471
472 case 21:
473 w1[1] = w1[1] | 0x0100;
474 break;
475
476 case 22:
477 w1[1] = w1[1] | 0x010000;
478 break;
479
480 case 23:
481 w1[1] = w1[1] | 0x01000000;
482 break;
483
484 case 24:
485 w1[2] = 0x01;
486 break;
487
488 case 25:
489 w1[2] = w1[2] | 0x0100;
490 break;
491
492 case 26:
493 w1[2] = w1[2] | 0x010000;
494 break;
495
496 case 27:
497 w1[2] = w1[2] | 0x01000000;
498 break;
499
500 case 28:
501 w1[3] = 0x01;
502 break;
503
504 case 29:
505 w1[3] = w1[3] | 0x0100;
506 break;
507
508 case 30:
509 w1[3] = w1[3] | 0x010000;
510 break;
511
512 case 31:
513 w1[3] = w1[3] | 0x01000000;
514 break;
515
516 case 32:
517 w2[0] = 0x01;
518 break;
519
520 case 33:
521 w2[0] = w2[0] | 0x0100;
522 break;
523
524 case 34:
525 w2[0] = w2[0] | 0x010000;
526 break;
527
528 case 35:
529 w2[0] = w2[0] | 0x01000000;
530 break;
531
532 case 36:
533 w2[1] = 0x01;
534 break;
535
536 case 37:
537 w2[1] = w2[1] | 0x0100;
538 break;
539
540 case 38:
541 w2[1] = w2[1] | 0x010000;
542 break;
543
544 case 39:
545 w2[1] = w2[1] | 0x01000000;
546 break;
547
548 case 40:
549 w2[2] = 0x01;
550 break;
551
552 case 41:
553 w2[2] = w2[2] | 0x0100;
554 break;
555
556 case 42:
557 w2[2] = w2[2] | 0x010000;
558 break;
559
560 case 43:
561 w2[2] = w2[2] | 0x01000000;
562 break;
563
564 case 44:
565 w2[3] = 0x01;
566 break;
567
568 case 45:
569 w2[3] = w2[3] | 0x0100;
570 break;
571
572 case 46:
573 w2[3] = w2[3] | 0x010000;
574 break;
575
576 case 47:
577 w2[3] = w2[3] | 0x01000000;
578 break;
579 }
580 }
581
582 static void append_0x01_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
583 {
584 switch (offset)
585 {
586 case 0:
587 w0[0] = 0x01;
588 break;
589
590 case 1:
591 w0[0] = w0[0] | 0x0100;
592 break;
593
594 case 2:
595 w0[0] = w0[0] | 0x010000;
596 break;
597
598 case 3:
599 w0[0] = w0[0] | 0x01000000;
600 break;
601
602 case 4:
603 w0[1] = 0x01;
604 break;
605
606 case 5:
607 w0[1] = w0[1] | 0x0100;
608 break;
609
610 case 6:
611 w0[1] = w0[1] | 0x010000;
612 break;
613
614 case 7:
615 w0[1] = w0[1] | 0x01000000;
616 break;
617
618 case 8:
619 w0[2] = 0x01;
620 break;
621
622 case 9:
623 w0[2] = w0[2] | 0x0100;
624 break;
625
626 case 10:
627 w0[2] = w0[2] | 0x010000;
628 break;
629
630 case 11:
631 w0[2] = w0[2] | 0x01000000;
632 break;
633
634 case 12:
635 w0[3] = 0x01;
636 break;
637
638 case 13:
639 w0[3] = w0[3] | 0x0100;
640 break;
641
642 case 14:
643 w0[3] = w0[3] | 0x010000;
644 break;
645
646 case 15:
647 w0[3] = w0[3] | 0x01000000;
648 break;
649
650 case 16:
651 w1[0] = 0x01;
652 break;
653
654 case 17:
655 w1[0] = w1[0] | 0x0100;
656 break;
657
658 case 18:
659 w1[0] = w1[0] | 0x010000;
660 break;
661
662 case 19:
663 w1[0] = w1[0] | 0x01000000;
664 break;
665
666 case 20:
667 w1[1] = 0x01;
668 break;
669
670 case 21:
671 w1[1] = w1[1] | 0x0100;
672 break;
673
674 case 22:
675 w1[1] = w1[1] | 0x010000;
676 break;
677
678 case 23:
679 w1[1] = w1[1] | 0x01000000;
680 break;
681
682 case 24:
683 w1[2] = 0x01;
684 break;
685
686 case 25:
687 w1[2] = w1[2] | 0x0100;
688 break;
689
690 case 26:
691 w1[2] = w1[2] | 0x010000;
692 break;
693
694 case 27:
695 w1[2] = w1[2] | 0x01000000;
696 break;
697
698 case 28:
699 w1[3] = 0x01;
700 break;
701
702 case 29:
703 w1[3] = w1[3] | 0x0100;
704 break;
705
706 case 30:
707 w1[3] = w1[3] | 0x010000;
708 break;
709
710 case 31:
711 w1[3] = w1[3] | 0x01000000;
712 break;
713
714 case 32:
715 w2[0] = 0x01;
716 break;
717
718 case 33:
719 w2[0] = w2[0] | 0x0100;
720 break;
721
722 case 34:
723 w2[0] = w2[0] | 0x010000;
724 break;
725
726 case 35:
727 w2[0] = w2[0] | 0x01000000;
728 break;
729
730 case 36:
731 w2[1] = 0x01;
732 break;
733
734 case 37:
735 w2[1] = w2[1] | 0x0100;
736 break;
737
738 case 38:
739 w2[1] = w2[1] | 0x010000;
740 break;
741
742 case 39:
743 w2[1] = w2[1] | 0x01000000;
744 break;
745
746 case 40:
747 w2[2] = 0x01;
748 break;
749
750 case 41:
751 w2[2] = w2[2] | 0x0100;
752 break;
753
754 case 42:
755 w2[2] = w2[2] | 0x010000;
756 break;
757
758 case 43:
759 w2[2] = w2[2] | 0x01000000;
760 break;
761
762 case 44:
763 w2[3] = 0x01;
764 break;
765
766 case 45:
767 w2[3] = w2[3] | 0x0100;
768 break;
769
770 case 46:
771 w2[3] = w2[3] | 0x010000;
772 break;
773
774 case 47:
775 w2[3] = w2[3] | 0x01000000;
776 break;
777
778 case 48:
779 w3[0] = 0x01;
780 break;
781
782 case 49:
783 w3[0] = w3[0] | 0x0100;
784 break;
785
786 case 50:
787 w3[0] = w3[0] | 0x010000;
788 break;
789
790 case 51:
791 w3[0] = w3[0] | 0x01000000;
792 break;
793
794 case 52:
795 w3[1] = 0x01;
796 break;
797
798 case 53:
799 w3[1] = w3[1] | 0x0100;
800 break;
801
802 case 54:
803 w3[1] = w3[1] | 0x010000;
804 break;
805
806 case 55:
807 w3[1] = w3[1] | 0x01000000;
808 break;
809
810 case 56:
811 w3[2] = 0x01;
812 break;
813
814 case 57:
815 w3[2] = w3[2] | 0x0100;
816 break;
817
818 case 58:
819 w3[2] = w3[2] | 0x010000;
820 break;
821
822 case 59:
823 w3[2] = w3[2] | 0x01000000;
824 break;
825
826 case 60:
827 w3[3] = 0x01;
828 break;
829
830 case 61:
831 w3[3] = w3[3] | 0x0100;
832 break;
833
834 case 62:
835 w3[3] = w3[3] | 0x010000;
836 break;
837
838 case 63:
839 w3[3] = w3[3] | 0x01000000;
840 break;
841 }
842 }
843
844 static void append_0x01_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
845 {
846 switch (offset)
847 {
848 case 0:
849 w0[0] = 0x01;
850 break;
851
852 case 1:
853 w0[0] = w0[0] | 0x0100;
854 break;
855
856 case 2:
857 w0[0] = w0[0] | 0x010000;
858 break;
859
860 case 3:
861 w0[0] = w0[0] | 0x01000000;
862 break;
863
864 case 4:
865 w0[1] = 0x01;
866 break;
867
868 case 5:
869 w0[1] = w0[1] | 0x0100;
870 break;
871
872 case 6:
873 w0[1] = w0[1] | 0x010000;
874 break;
875
876 case 7:
877 w0[1] = w0[1] | 0x01000000;
878 break;
879
880 case 8:
881 w0[2] = 0x01;
882 break;
883
884 case 9:
885 w0[2] = w0[2] | 0x0100;
886 break;
887
888 case 10:
889 w0[2] = w0[2] | 0x010000;
890 break;
891
892 case 11:
893 w0[2] = w0[2] | 0x01000000;
894 break;
895
896 case 12:
897 w0[3] = 0x01;
898 break;
899
900 case 13:
901 w0[3] = w0[3] | 0x0100;
902 break;
903
904 case 14:
905 w0[3] = w0[3] | 0x010000;
906 break;
907
908 case 15:
909 w0[3] = w0[3] | 0x01000000;
910 break;
911
912 case 16:
913 w1[0] = 0x01;
914 break;
915
916 case 17:
917 w1[0] = w1[0] | 0x0100;
918 break;
919
920 case 18:
921 w1[0] = w1[0] | 0x010000;
922 break;
923
924 case 19:
925 w1[0] = w1[0] | 0x01000000;
926 break;
927
928 case 20:
929 w1[1] = 0x01;
930 break;
931
932 case 21:
933 w1[1] = w1[1] | 0x0100;
934 break;
935
936 case 22:
937 w1[1] = w1[1] | 0x010000;
938 break;
939
940 case 23:
941 w1[1] = w1[1] | 0x01000000;
942 break;
943
944 case 24:
945 w1[2] = 0x01;
946 break;
947
948 case 25:
949 w1[2] = w1[2] | 0x0100;
950 break;
951
952 case 26:
953 w1[2] = w1[2] | 0x010000;
954 break;
955
956 case 27:
957 w1[2] = w1[2] | 0x01000000;
958 break;
959
960 case 28:
961 w1[3] = 0x01;
962 break;
963
964 case 29:
965 w1[3] = w1[3] | 0x0100;
966 break;
967
968 case 30:
969 w1[3] = w1[3] | 0x010000;
970 break;
971
972 case 31:
973 w1[3] = w1[3] | 0x01000000;
974 break;
975
976 case 32:
977 w2[0] = 0x01;
978 break;
979
980 case 33:
981 w2[0] = w2[0] | 0x0100;
982 break;
983
984 case 34:
985 w2[0] = w2[0] | 0x010000;
986 break;
987
988 case 35:
989 w2[0] = w2[0] | 0x01000000;
990 break;
991
992 case 36:
993 w2[1] = 0x01;
994 break;
995
996 case 37:
997 w2[1] = w2[1] | 0x0100;
998 break;
999
1000 case 38:
1001 w2[1] = w2[1] | 0x010000;
1002 break;
1003
1004 case 39:
1005 w2[1] = w2[1] | 0x01000000;
1006 break;
1007
1008 case 40:
1009 w2[2] = 0x01;
1010 break;
1011
1012 case 41:
1013 w2[2] = w2[2] | 0x0100;
1014 break;
1015
1016 case 42:
1017 w2[2] = w2[2] | 0x010000;
1018 break;
1019
1020 case 43:
1021 w2[2] = w2[2] | 0x01000000;
1022 break;
1023
1024 case 44:
1025 w2[3] = 0x01;
1026 break;
1027
1028 case 45:
1029 w2[3] = w2[3] | 0x0100;
1030 break;
1031
1032 case 46:
1033 w2[3] = w2[3] | 0x010000;
1034 break;
1035
1036 case 47:
1037 w2[3] = w2[3] | 0x01000000;
1038 break;
1039
1040 case 48:
1041 w3[0] = 0x01;
1042 break;
1043
1044 case 49:
1045 w3[0] = w3[0] | 0x0100;
1046 break;
1047
1048 case 50:
1049 w3[0] = w3[0] | 0x010000;
1050 break;
1051
1052 case 51:
1053 w3[0] = w3[0] | 0x01000000;
1054 break;
1055
1056 case 52:
1057 w3[1] = 0x01;
1058 break;
1059
1060 case 53:
1061 w3[1] = w3[1] | 0x0100;
1062 break;
1063
1064 case 54:
1065 w3[1] = w3[1] | 0x010000;
1066 break;
1067
1068 case 55:
1069 w3[1] = w3[1] | 0x01000000;
1070 break;
1071
1072 case 56:
1073 w3[2] = 0x01;
1074 break;
1075
1076 case 57:
1077 w3[2] = w3[2] | 0x0100;
1078 break;
1079
1080 case 58:
1081 w3[2] = w3[2] | 0x010000;
1082 break;
1083
1084 case 59:
1085 w3[2] = w3[2] | 0x01000000;
1086 break;
1087
1088 case 60:
1089 w3[3] = 0x01;
1090 break;
1091
1092 case 61:
1093 w3[3] = w3[3] | 0x0100;
1094 break;
1095
1096 case 62:
1097 w3[3] = w3[3] | 0x010000;
1098 break;
1099
1100 case 63:
1101 w3[3] = w3[3] | 0x01000000;
1102 break;
1103
1104 case 64:
1105 w4[0] = 0x01;
1106 break;
1107
1108 case 65:
1109 w4[0] = w4[0] | 0x0100;
1110 break;
1111
1112 case 66:
1113 w4[0] = w4[0] | 0x010000;
1114 break;
1115
1116 case 67:
1117 w4[0] = w4[0] | 0x01000000;
1118 break;
1119
1120 case 68:
1121 w4[1] = 0x01;
1122 break;
1123
1124 case 69:
1125 w4[1] = w4[1] | 0x0100;
1126 break;
1127
1128 case 70:
1129 w4[1] = w4[1] | 0x010000;
1130 break;
1131
1132 case 71:
1133 w4[1] = w4[1] | 0x01000000;
1134 break;
1135
1136 case 72:
1137 w4[2] = 0x01;
1138 break;
1139
1140 case 73:
1141 w4[2] = w4[2] | 0x0100;
1142 break;
1143
1144 case 74:
1145 w4[2] = w4[2] | 0x010000;
1146 break;
1147
1148 case 75:
1149 w4[2] = w4[2] | 0x01000000;
1150 break;
1151
1152 case 76:
1153 w4[3] = 0x01;
1154 break;
1155
1156 case 77:
1157 w4[3] = w4[3] | 0x0100;
1158 break;
1159
1160 case 78:
1161 w4[3] = w4[3] | 0x010000;
1162 break;
1163
1164 case 79:
1165 w4[3] = w4[3] | 0x01000000;
1166 break;
1167
1168 case 80:
1169 w5[0] = 0x01;
1170 break;
1171
1172 case 81:
1173 w5[0] = w5[0] | 0x0100;
1174 break;
1175
1176 case 82:
1177 w5[0] = w5[0] | 0x010000;
1178 break;
1179
1180 case 83:
1181 w5[0] = w5[0] | 0x01000000;
1182 break;
1183
1184 case 84:
1185 w5[1] = 0x01;
1186 break;
1187
1188 case 85:
1189 w5[1] = w5[1] | 0x0100;
1190 break;
1191
1192 case 86:
1193 w5[1] = w5[1] | 0x010000;
1194 break;
1195
1196 case 87:
1197 w5[1] = w5[1] | 0x01000000;
1198 break;
1199
1200 case 88:
1201 w5[2] = 0x01;
1202 break;
1203
1204 case 89:
1205 w5[2] = w5[2] | 0x0100;
1206 break;
1207
1208 case 90:
1209 w5[2] = w5[2] | 0x010000;
1210 break;
1211
1212 case 91:
1213 w5[2] = w5[2] | 0x01000000;
1214 break;
1215
1216 case 92:
1217 w5[3] = 0x01;
1218 break;
1219
1220 case 93:
1221 w5[3] = w5[3] | 0x0100;
1222 break;
1223
1224 case 94:
1225 w5[3] = w5[3] | 0x010000;
1226 break;
1227
1228 case 95:
1229 w5[3] = w5[3] | 0x01000000;
1230 break;
1231
1232 case 96:
1233 w6[0] = 0x01;
1234 break;
1235
1236 case 97:
1237 w6[0] = w6[0] | 0x0100;
1238 break;
1239
1240 case 98:
1241 w6[0] = w6[0] | 0x010000;
1242 break;
1243
1244 case 99:
1245 w6[0] = w6[0] | 0x01000000;
1246 break;
1247
1248 case 100:
1249 w6[1] = 0x01;
1250 break;
1251
1252 case 101:
1253 w6[1] = w6[1] | 0x0100;
1254 break;
1255
1256 case 102:
1257 w6[1] = w6[1] | 0x010000;
1258 break;
1259
1260 case 103:
1261 w6[1] = w6[1] | 0x01000000;
1262 break;
1263
1264 case 104:
1265 w6[2] = 0x01;
1266 break;
1267
1268 case 105:
1269 w6[2] = w6[2] | 0x0100;
1270 break;
1271
1272 case 106:
1273 w6[2] = w6[2] | 0x010000;
1274 break;
1275
1276 case 107:
1277 w6[2] = w6[2] | 0x01000000;
1278 break;
1279
1280 case 108:
1281 w6[3] = 0x01;
1282 break;
1283
1284 case 109:
1285 w6[3] = w6[3] | 0x0100;
1286 break;
1287
1288 case 110:
1289 w6[3] = w6[3] | 0x010000;
1290 break;
1291
1292 case 111:
1293 w6[3] = w6[3] | 0x01000000;
1294 break;
1295
1296 case 112:
1297 w7[0] = 0x01;
1298 break;
1299
1300 case 113:
1301 w7[0] = w7[0] | 0x0100;
1302 break;
1303
1304 case 114:
1305 w7[0] = w7[0] | 0x010000;
1306 break;
1307
1308 case 115:
1309 w7[0] = w7[0] | 0x01000000;
1310 break;
1311
1312 case 116:
1313 w7[1] = 0x01;
1314 break;
1315
1316 case 117:
1317 w7[1] = w7[1] | 0x0100;
1318 break;
1319
1320 case 118:
1321 w7[1] = w7[1] | 0x010000;
1322 break;
1323
1324 case 119:
1325 w7[1] = w7[1] | 0x01000000;
1326 break;
1327
1328 case 120:
1329 w7[2] = 0x01;
1330 break;
1331
1332 case 121:
1333 w7[2] = w7[2] | 0x0100;
1334 break;
1335
1336 case 122:
1337 w7[2] = w7[2] | 0x010000;
1338 break;
1339
1340 case 123:
1341 w7[2] = w7[2] | 0x01000000;
1342 break;
1343
1344 case 124:
1345 w7[3] = 0x01;
1346 break;
1347
1348 case 125:
1349 w7[3] = w7[3] | 0x0100;
1350 break;
1351
1352 case 126:
1353 w7[3] = w7[3] | 0x010000;
1354 break;
1355
1356 case 127:
1357 w7[3] = w7[3] | 0x01000000;
1358 break;
1359 }
1360 }
1361
1362 static void append_0x02_1x4 (u32 w0[4], const u32 offset)
1363 {
1364 switch (offset)
1365 {
1366 case 0:
1367 w0[0] = 0x02;
1368 break;
1369
1370 case 1:
1371 w0[0] = w0[0] | 0x0200;
1372 break;
1373
1374 case 2:
1375 w0[0] = w0[0] | 0x020000;
1376 break;
1377
1378 case 3:
1379 w0[0] = w0[0] | 0x02000000;
1380 break;
1381
1382 case 4:
1383 w0[1] = 0x02;
1384 break;
1385
1386 case 5:
1387 w0[1] = w0[1] | 0x0200;
1388 break;
1389
1390 case 6:
1391 w0[1] = w0[1] | 0x020000;
1392 break;
1393
1394 case 7:
1395 w0[1] = w0[1] | 0x02000000;
1396 break;
1397
1398 case 8:
1399 w0[2] = 0x02;
1400 break;
1401
1402 case 9:
1403 w0[2] = w0[2] | 0x0200;
1404 break;
1405
1406 case 10:
1407 w0[2] = w0[2] | 0x020000;
1408 break;
1409
1410 case 11:
1411 w0[2] = w0[2] | 0x02000000;
1412 break;
1413
1414 case 12:
1415 w0[3] = 0x02;
1416 break;
1417
1418 case 13:
1419 w0[3] = w0[3] | 0x0200;
1420 break;
1421
1422 case 14:
1423 w0[3] = w0[3] | 0x020000;
1424 break;
1425
1426 case 15:
1427 w0[3] = w0[3] | 0x02000000;
1428 break;
1429 }
1430 }
1431
1432 static void append_0x02_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
1433 {
1434 switch (offset)
1435 {
1436 case 0:
1437 w0[0] = 0x02;
1438 break;
1439
1440 case 1:
1441 w0[0] = w0[0] | 0x0200;
1442 break;
1443
1444 case 2:
1445 w0[0] = w0[0] | 0x020000;
1446 break;
1447
1448 case 3:
1449 w0[0] = w0[0] | 0x02000000;
1450 break;
1451
1452 case 4:
1453 w0[1] = 0x02;
1454 break;
1455
1456 case 5:
1457 w0[1] = w0[1] | 0x0200;
1458 break;
1459
1460 case 6:
1461 w0[1] = w0[1] | 0x020000;
1462 break;
1463
1464 case 7:
1465 w0[1] = w0[1] | 0x02000000;
1466 break;
1467
1468 case 8:
1469 w0[2] = 0x02;
1470 break;
1471
1472 case 9:
1473 w0[2] = w0[2] | 0x0200;
1474 break;
1475
1476 case 10:
1477 w0[2] = w0[2] | 0x020000;
1478 break;
1479
1480 case 11:
1481 w0[2] = w0[2] | 0x02000000;
1482 break;
1483
1484 case 12:
1485 w0[3] = 0x02;
1486 break;
1487
1488 case 13:
1489 w0[3] = w0[3] | 0x0200;
1490 break;
1491
1492 case 14:
1493 w0[3] = w0[3] | 0x020000;
1494 break;
1495
1496 case 15:
1497 w0[3] = w0[3] | 0x02000000;
1498 break;
1499
1500 case 16:
1501 w1[0] = 0x02;
1502 break;
1503
1504 case 17:
1505 w1[0] = w1[0] | 0x0200;
1506 break;
1507
1508 case 18:
1509 w1[0] = w1[0] | 0x020000;
1510 break;
1511
1512 case 19:
1513 w1[0] = w1[0] | 0x02000000;
1514 break;
1515
1516 case 20:
1517 w1[1] = 0x02;
1518 break;
1519
1520 case 21:
1521 w1[1] = w1[1] | 0x0200;
1522 break;
1523
1524 case 22:
1525 w1[1] = w1[1] | 0x020000;
1526 break;
1527
1528 case 23:
1529 w1[1] = w1[1] | 0x02000000;
1530 break;
1531
1532 case 24:
1533 w1[2] = 0x02;
1534 break;
1535
1536 case 25:
1537 w1[2] = w1[2] | 0x0200;
1538 break;
1539
1540 case 26:
1541 w1[2] = w1[2] | 0x020000;
1542 break;
1543
1544 case 27:
1545 w1[2] = w1[2] | 0x02000000;
1546 break;
1547
1548 case 28:
1549 w1[3] = 0x02;
1550 break;
1551
1552 case 29:
1553 w1[3] = w1[3] | 0x0200;
1554 break;
1555
1556 case 30:
1557 w1[3] = w1[3] | 0x020000;
1558 break;
1559
1560 case 31:
1561 w1[3] = w1[3] | 0x02000000;
1562 break;
1563 }
1564 }
1565
1566 static void append_0x02_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
1567 {
1568 switch (offset)
1569 {
1570 case 0:
1571 w0[0] = 0x02;
1572 break;
1573
1574 case 1:
1575 w0[0] = w0[0] | 0x0200;
1576 break;
1577
1578 case 2:
1579 w0[0] = w0[0] | 0x020000;
1580 break;
1581
1582 case 3:
1583 w0[0] = w0[0] | 0x02000000;
1584 break;
1585
1586 case 4:
1587 w0[1] = 0x02;
1588 break;
1589
1590 case 5:
1591 w0[1] = w0[1] | 0x0200;
1592 break;
1593
1594 case 6:
1595 w0[1] = w0[1] | 0x020000;
1596 break;
1597
1598 case 7:
1599 w0[1] = w0[1] | 0x02000000;
1600 break;
1601
1602 case 8:
1603 w0[2] = 0x02;
1604 break;
1605
1606 case 9:
1607 w0[2] = w0[2] | 0x0200;
1608 break;
1609
1610 case 10:
1611 w0[2] = w0[2] | 0x020000;
1612 break;
1613
1614 case 11:
1615 w0[2] = w0[2] | 0x02000000;
1616 break;
1617
1618 case 12:
1619 w0[3] = 0x02;
1620 break;
1621
1622 case 13:
1623 w0[3] = w0[3] | 0x0200;
1624 break;
1625
1626 case 14:
1627 w0[3] = w0[3] | 0x020000;
1628 break;
1629
1630 case 15:
1631 w0[3] = w0[3] | 0x02000000;
1632 break;
1633
1634 case 16:
1635 w1[0] = 0x02;
1636 break;
1637
1638 case 17:
1639 w1[0] = w1[0] | 0x0200;
1640 break;
1641
1642 case 18:
1643 w1[0] = w1[0] | 0x020000;
1644 break;
1645
1646 case 19:
1647 w1[0] = w1[0] | 0x02000000;
1648 break;
1649
1650 case 20:
1651 w1[1] = 0x02;
1652 break;
1653
1654 case 21:
1655 w1[1] = w1[1] | 0x0200;
1656 break;
1657
1658 case 22:
1659 w1[1] = w1[1] | 0x020000;
1660 break;
1661
1662 case 23:
1663 w1[1] = w1[1] | 0x02000000;
1664 break;
1665
1666 case 24:
1667 w1[2] = 0x02;
1668 break;
1669
1670 case 25:
1671 w1[2] = w1[2] | 0x0200;
1672 break;
1673
1674 case 26:
1675 w1[2] = w1[2] | 0x020000;
1676 break;
1677
1678 case 27:
1679 w1[2] = w1[2] | 0x02000000;
1680 break;
1681
1682 case 28:
1683 w1[3] = 0x02;
1684 break;
1685
1686 case 29:
1687 w1[3] = w1[3] | 0x0200;
1688 break;
1689
1690 case 30:
1691 w1[3] = w1[3] | 0x020000;
1692 break;
1693
1694 case 31:
1695 w1[3] = w1[3] | 0x02000000;
1696 break;
1697
1698 case 32:
1699 w2[0] = 0x02;
1700 break;
1701
1702 case 33:
1703 w2[0] = w2[0] | 0x0200;
1704 break;
1705
1706 case 34:
1707 w2[0] = w2[0] | 0x020000;
1708 break;
1709
1710 case 35:
1711 w2[0] = w2[0] | 0x02000000;
1712 break;
1713
1714 case 36:
1715 w2[1] = 0x02;
1716 break;
1717
1718 case 37:
1719 w2[1] = w2[1] | 0x0200;
1720 break;
1721
1722 case 38:
1723 w2[1] = w2[1] | 0x020000;
1724 break;
1725
1726 case 39:
1727 w2[1] = w2[1] | 0x02000000;
1728 break;
1729
1730 case 40:
1731 w2[2] = 0x02;
1732 break;
1733
1734 case 41:
1735 w2[2] = w2[2] | 0x0200;
1736 break;
1737
1738 case 42:
1739 w2[2] = w2[2] | 0x020000;
1740 break;
1741
1742 case 43:
1743 w2[2] = w2[2] | 0x02000000;
1744 break;
1745
1746 case 44:
1747 w2[3] = 0x02;
1748 break;
1749
1750 case 45:
1751 w2[3] = w2[3] | 0x0200;
1752 break;
1753
1754 case 46:
1755 w2[3] = w2[3] | 0x020000;
1756 break;
1757
1758 case 47:
1759 w2[3] = w2[3] | 0x02000000;
1760 break;
1761 }
1762 }
1763
1764 static void append_0x02_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
1765 {
1766 switch (offset)
1767 {
1768 case 0:
1769 w0[0] = 0x02;
1770 break;
1771
1772 case 1:
1773 w0[0] = w0[0] | 0x0200;
1774 break;
1775
1776 case 2:
1777 w0[0] = w0[0] | 0x020000;
1778 break;
1779
1780 case 3:
1781 w0[0] = w0[0] | 0x02000000;
1782 break;
1783
1784 case 4:
1785 w0[1] = 0x02;
1786 break;
1787
1788 case 5:
1789 w0[1] = w0[1] | 0x0200;
1790 break;
1791
1792 case 6:
1793 w0[1] = w0[1] | 0x020000;
1794 break;
1795
1796 case 7:
1797 w0[1] = w0[1] | 0x02000000;
1798 break;
1799
1800 case 8:
1801 w0[2] = 0x02;
1802 break;
1803
1804 case 9:
1805 w0[2] = w0[2] | 0x0200;
1806 break;
1807
1808 case 10:
1809 w0[2] = w0[2] | 0x020000;
1810 break;
1811
1812 case 11:
1813 w0[2] = w0[2] | 0x02000000;
1814 break;
1815
1816 case 12:
1817 w0[3] = 0x02;
1818 break;
1819
1820 case 13:
1821 w0[3] = w0[3] | 0x0200;
1822 break;
1823
1824 case 14:
1825 w0[3] = w0[3] | 0x020000;
1826 break;
1827
1828 case 15:
1829 w0[3] = w0[3] | 0x02000000;
1830 break;
1831
1832 case 16:
1833 w1[0] = 0x02;
1834 break;
1835
1836 case 17:
1837 w1[0] = w1[0] | 0x0200;
1838 break;
1839
1840 case 18:
1841 w1[0] = w1[0] | 0x020000;
1842 break;
1843
1844 case 19:
1845 w1[0] = w1[0] | 0x02000000;
1846 break;
1847
1848 case 20:
1849 w1[1] = 0x02;
1850 break;
1851
1852 case 21:
1853 w1[1] = w1[1] | 0x0200;
1854 break;
1855
1856 case 22:
1857 w1[1] = w1[1] | 0x020000;
1858 break;
1859
1860 case 23:
1861 w1[1] = w1[1] | 0x02000000;
1862 break;
1863
1864 case 24:
1865 w1[2] = 0x02;
1866 break;
1867
1868 case 25:
1869 w1[2] = w1[2] | 0x0200;
1870 break;
1871
1872 case 26:
1873 w1[2] = w1[2] | 0x020000;
1874 break;
1875
1876 case 27:
1877 w1[2] = w1[2] | 0x02000000;
1878 break;
1879
1880 case 28:
1881 w1[3] = 0x02;
1882 break;
1883
1884 case 29:
1885 w1[3] = w1[3] | 0x0200;
1886 break;
1887
1888 case 30:
1889 w1[3] = w1[3] | 0x020000;
1890 break;
1891
1892 case 31:
1893 w1[3] = w1[3] | 0x02000000;
1894 break;
1895
1896 case 32:
1897 w2[0] = 0x02;
1898 break;
1899
1900 case 33:
1901 w2[0] = w2[0] | 0x0200;
1902 break;
1903
1904 case 34:
1905 w2[0] = w2[0] | 0x020000;
1906 break;
1907
1908 case 35:
1909 w2[0] = w2[0] | 0x02000000;
1910 break;
1911
1912 case 36:
1913 w2[1] = 0x02;
1914 break;
1915
1916 case 37:
1917 w2[1] = w2[1] | 0x0200;
1918 break;
1919
1920 case 38:
1921 w2[1] = w2[1] | 0x020000;
1922 break;
1923
1924 case 39:
1925 w2[1] = w2[1] | 0x02000000;
1926 break;
1927
1928 case 40:
1929 w2[2] = 0x02;
1930 break;
1931
1932 case 41:
1933 w2[2] = w2[2] | 0x0200;
1934 break;
1935
1936 case 42:
1937 w2[2] = w2[2] | 0x020000;
1938 break;
1939
1940 case 43:
1941 w2[2] = w2[2] | 0x02000000;
1942 break;
1943
1944 case 44:
1945 w2[3] = 0x02;
1946 break;
1947
1948 case 45:
1949 w2[3] = w2[3] | 0x0200;
1950 break;
1951
1952 case 46:
1953 w2[3] = w2[3] | 0x020000;
1954 break;
1955
1956 case 47:
1957 w2[3] = w2[3] | 0x02000000;
1958 break;
1959
1960 case 48:
1961 w3[0] = 0x02;
1962 break;
1963
1964 case 49:
1965 w3[0] = w3[0] | 0x0200;
1966 break;
1967
1968 case 50:
1969 w3[0] = w3[0] | 0x020000;
1970 break;
1971
1972 case 51:
1973 w3[0] = w3[0] | 0x02000000;
1974 break;
1975
1976 case 52:
1977 w3[1] = 0x02;
1978 break;
1979
1980 case 53:
1981 w3[1] = w3[1] | 0x0200;
1982 break;
1983
1984 case 54:
1985 w3[1] = w3[1] | 0x020000;
1986 break;
1987
1988 case 55:
1989 w3[1] = w3[1] | 0x02000000;
1990 break;
1991
1992 case 56:
1993 w3[2] = 0x02;
1994 break;
1995
1996 case 57:
1997 w3[2] = w3[2] | 0x0200;
1998 break;
1999
2000 case 58:
2001 w3[2] = w3[2] | 0x020000;
2002 break;
2003
2004 case 59:
2005 w3[2] = w3[2] | 0x02000000;
2006 break;
2007
2008 case 60:
2009 w3[3] = 0x02;
2010 break;
2011
2012 case 61:
2013 w3[3] = w3[3] | 0x0200;
2014 break;
2015
2016 case 62:
2017 w3[3] = w3[3] | 0x020000;
2018 break;
2019
2020 case 63:
2021 w3[3] = w3[3] | 0x02000000;
2022 break;
2023 }
2024 }
2025
2026 static void append_0x02_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
2027 {
2028 switch (offset)
2029 {
2030 case 0:
2031 w0[0] = 0x02;
2032 break;
2033
2034 case 1:
2035 w0[0] = w0[0] | 0x0200;
2036 break;
2037
2038 case 2:
2039 w0[0] = w0[0] | 0x020000;
2040 break;
2041
2042 case 3:
2043 w0[0] = w0[0] | 0x02000000;
2044 break;
2045
2046 case 4:
2047 w0[1] = 0x02;
2048 break;
2049
2050 case 5:
2051 w0[1] = w0[1] | 0x0200;
2052 break;
2053
2054 case 6:
2055 w0[1] = w0[1] | 0x020000;
2056 break;
2057
2058 case 7:
2059 w0[1] = w0[1] | 0x02000000;
2060 break;
2061
2062 case 8:
2063 w0[2] = 0x02;
2064 break;
2065
2066 case 9:
2067 w0[2] = w0[2] | 0x0200;
2068 break;
2069
2070 case 10:
2071 w0[2] = w0[2] | 0x020000;
2072 break;
2073
2074 case 11:
2075 w0[2] = w0[2] | 0x02000000;
2076 break;
2077
2078 case 12:
2079 w0[3] = 0x02;
2080 break;
2081
2082 case 13:
2083 w0[3] = w0[3] | 0x0200;
2084 break;
2085
2086 case 14:
2087 w0[3] = w0[3] | 0x020000;
2088 break;
2089
2090 case 15:
2091 w0[3] = w0[3] | 0x02000000;
2092 break;
2093
2094 case 16:
2095 w1[0] = 0x02;
2096 break;
2097
2098 case 17:
2099 w1[0] = w1[0] | 0x0200;
2100 break;
2101
2102 case 18:
2103 w1[0] = w1[0] | 0x020000;
2104 break;
2105
2106 case 19:
2107 w1[0] = w1[0] | 0x02000000;
2108 break;
2109
2110 case 20:
2111 w1[1] = 0x02;
2112 break;
2113
2114 case 21:
2115 w1[1] = w1[1] | 0x0200;
2116 break;
2117
2118 case 22:
2119 w1[1] = w1[1] | 0x020000;
2120 break;
2121
2122 case 23:
2123 w1[1] = w1[1] | 0x02000000;
2124 break;
2125
2126 case 24:
2127 w1[2] = 0x02;
2128 break;
2129
2130 case 25:
2131 w1[2] = w1[2] | 0x0200;
2132 break;
2133
2134 case 26:
2135 w1[2] = w1[2] | 0x020000;
2136 break;
2137
2138 case 27:
2139 w1[2] = w1[2] | 0x02000000;
2140 break;
2141
2142 case 28:
2143 w1[3] = 0x02;
2144 break;
2145
2146 case 29:
2147 w1[3] = w1[3] | 0x0200;
2148 break;
2149
2150 case 30:
2151 w1[3] = w1[3] | 0x020000;
2152 break;
2153
2154 case 31:
2155 w1[3] = w1[3] | 0x02000000;
2156 break;
2157
2158 case 32:
2159 w2[0] = 0x02;
2160 break;
2161
2162 case 33:
2163 w2[0] = w2[0] | 0x0200;
2164 break;
2165
2166 case 34:
2167 w2[0] = w2[0] | 0x020000;
2168 break;
2169
2170 case 35:
2171 w2[0] = w2[0] | 0x02000000;
2172 break;
2173
2174 case 36:
2175 w2[1] = 0x02;
2176 break;
2177
2178 case 37:
2179 w2[1] = w2[1] | 0x0200;
2180 break;
2181
2182 case 38:
2183 w2[1] = w2[1] | 0x020000;
2184 break;
2185
2186 case 39:
2187 w2[1] = w2[1] | 0x02000000;
2188 break;
2189
2190 case 40:
2191 w2[2] = 0x02;
2192 break;
2193
2194 case 41:
2195 w2[2] = w2[2] | 0x0200;
2196 break;
2197
2198 case 42:
2199 w2[2] = w2[2] | 0x020000;
2200 break;
2201
2202 case 43:
2203 w2[2] = w2[2] | 0x02000000;
2204 break;
2205
2206 case 44:
2207 w2[3] = 0x02;
2208 break;
2209
2210 case 45:
2211 w2[3] = w2[3] | 0x0200;
2212 break;
2213
2214 case 46:
2215 w2[3] = w2[3] | 0x020000;
2216 break;
2217
2218 case 47:
2219 w2[3] = w2[3] | 0x02000000;
2220 break;
2221
2222 case 48:
2223 w3[0] = 0x02;
2224 break;
2225
2226 case 49:
2227 w3[0] = w3[0] | 0x0200;
2228 break;
2229
2230 case 50:
2231 w3[0] = w3[0] | 0x020000;
2232 break;
2233
2234 case 51:
2235 w3[0] = w3[0] | 0x02000000;
2236 break;
2237
2238 case 52:
2239 w3[1] = 0x02;
2240 break;
2241
2242 case 53:
2243 w3[1] = w3[1] | 0x0200;
2244 break;
2245
2246 case 54:
2247 w3[1] = w3[1] | 0x020000;
2248 break;
2249
2250 case 55:
2251 w3[1] = w3[1] | 0x02000000;
2252 break;
2253
2254 case 56:
2255 w3[2] = 0x02;
2256 break;
2257
2258 case 57:
2259 w3[2] = w3[2] | 0x0200;
2260 break;
2261
2262 case 58:
2263 w3[2] = w3[2] | 0x020000;
2264 break;
2265
2266 case 59:
2267 w3[2] = w3[2] | 0x02000000;
2268 break;
2269
2270 case 60:
2271 w3[3] = 0x02;
2272 break;
2273
2274 case 61:
2275 w3[3] = w3[3] | 0x0200;
2276 break;
2277
2278 case 62:
2279 w3[3] = w3[3] | 0x020000;
2280 break;
2281
2282 case 63:
2283 w3[3] = w3[3] | 0x02000000;
2284 break;
2285
2286 case 64:
2287 w4[0] = 0x02;
2288 break;
2289
2290 case 65:
2291 w4[0] = w4[0] | 0x0200;
2292 break;
2293
2294 case 66:
2295 w4[0] = w4[0] | 0x020000;
2296 break;
2297
2298 case 67:
2299 w4[0] = w4[0] | 0x02000000;
2300 break;
2301
2302 case 68:
2303 w4[1] = 0x02;
2304 break;
2305
2306 case 69:
2307 w4[1] = w4[1] | 0x0200;
2308 break;
2309
2310 case 70:
2311 w4[1] = w4[1] | 0x020000;
2312 break;
2313
2314 case 71:
2315 w4[1] = w4[1] | 0x02000000;
2316 break;
2317
2318 case 72:
2319 w4[2] = 0x02;
2320 break;
2321
2322 case 73:
2323 w4[2] = w4[2] | 0x0200;
2324 break;
2325
2326 case 74:
2327 w4[2] = w4[2] | 0x020000;
2328 break;
2329
2330 case 75:
2331 w4[2] = w4[2] | 0x02000000;
2332 break;
2333
2334 case 76:
2335 w4[3] = 0x02;
2336 break;
2337
2338 case 77:
2339 w4[3] = w4[3] | 0x0200;
2340 break;
2341
2342 case 78:
2343 w4[3] = w4[3] | 0x020000;
2344 break;
2345
2346 case 79:
2347 w4[3] = w4[3] | 0x02000000;
2348 break;
2349
2350 case 80:
2351 w5[0] = 0x02;
2352 break;
2353
2354 case 81:
2355 w5[0] = w5[0] | 0x0200;
2356 break;
2357
2358 case 82:
2359 w5[0] = w5[0] | 0x020000;
2360 break;
2361
2362 case 83:
2363 w5[0] = w5[0] | 0x02000000;
2364 break;
2365
2366 case 84:
2367 w5[1] = 0x02;
2368 break;
2369
2370 case 85:
2371 w5[1] = w5[1] | 0x0200;
2372 break;
2373
2374 case 86:
2375 w5[1] = w5[1] | 0x020000;
2376 break;
2377
2378 case 87:
2379 w5[1] = w5[1] | 0x02000000;
2380 break;
2381
2382 case 88:
2383 w5[2] = 0x02;
2384 break;
2385
2386 case 89:
2387 w5[2] = w5[2] | 0x0200;
2388 break;
2389
2390 case 90:
2391 w5[2] = w5[2] | 0x020000;
2392 break;
2393
2394 case 91:
2395 w5[2] = w5[2] | 0x02000000;
2396 break;
2397
2398 case 92:
2399 w5[3] = 0x02;
2400 break;
2401
2402 case 93:
2403 w5[3] = w5[3] | 0x0200;
2404 break;
2405
2406 case 94:
2407 w5[3] = w5[3] | 0x020000;
2408 break;
2409
2410 case 95:
2411 w5[3] = w5[3] | 0x02000000;
2412 break;
2413
2414 case 96:
2415 w6[0] = 0x02;
2416 break;
2417
2418 case 97:
2419 w6[0] = w6[0] | 0x0200;
2420 break;
2421
2422 case 98:
2423 w6[0] = w6[0] | 0x020000;
2424 break;
2425
2426 case 99:
2427 w6[0] = w6[0] | 0x02000000;
2428 break;
2429
2430 case 100:
2431 w6[1] = 0x02;
2432 break;
2433
2434 case 101:
2435 w6[1] = w6[1] | 0x0200;
2436 break;
2437
2438 case 102:
2439 w6[1] = w6[1] | 0x020000;
2440 break;
2441
2442 case 103:
2443 w6[1] = w6[1] | 0x02000000;
2444 break;
2445
2446 case 104:
2447 w6[2] = 0x02;
2448 break;
2449
2450 case 105:
2451 w6[2] = w6[2] | 0x0200;
2452 break;
2453
2454 case 106:
2455 w6[2] = w6[2] | 0x020000;
2456 break;
2457
2458 case 107:
2459 w6[2] = w6[2] | 0x02000000;
2460 break;
2461
2462 case 108:
2463 w6[3] = 0x02;
2464 break;
2465
2466 case 109:
2467 w6[3] = w6[3] | 0x0200;
2468 break;
2469
2470 case 110:
2471 w6[3] = w6[3] | 0x020000;
2472 break;
2473
2474 case 111:
2475 w6[3] = w6[3] | 0x02000000;
2476 break;
2477
2478 case 112:
2479 w7[0] = 0x02;
2480 break;
2481
2482 case 113:
2483 w7[0] = w7[0] | 0x0200;
2484 break;
2485
2486 case 114:
2487 w7[0] = w7[0] | 0x020000;
2488 break;
2489
2490 case 115:
2491 w7[0] = w7[0] | 0x02000000;
2492 break;
2493
2494 case 116:
2495 w7[1] = 0x02;
2496 break;
2497
2498 case 117:
2499 w7[1] = w7[1] | 0x0200;
2500 break;
2501
2502 case 118:
2503 w7[1] = w7[1] | 0x020000;
2504 break;
2505
2506 case 119:
2507 w7[1] = w7[1] | 0x02000000;
2508 break;
2509
2510 case 120:
2511 w7[2] = 0x02;
2512 break;
2513
2514 case 121:
2515 w7[2] = w7[2] | 0x0200;
2516 break;
2517
2518 case 122:
2519 w7[2] = w7[2] | 0x020000;
2520 break;
2521
2522 case 123:
2523 w7[2] = w7[2] | 0x02000000;
2524 break;
2525
2526 case 124:
2527 w7[3] = 0x02;
2528 break;
2529
2530 case 125:
2531 w7[3] = w7[3] | 0x0200;
2532 break;
2533
2534 case 126:
2535 w7[3] = w7[3] | 0x020000;
2536 break;
2537
2538 case 127:
2539 w7[3] = w7[3] | 0x02000000;
2540 break;
2541 }
2542 }
2543
2544 static void append_0x80_1x4 (u32 w0[4], const u32 offset)
2545 {
2546 switch (offset)
2547 {
2548 case 0:
2549 w0[0] = 0x80;
2550 break;
2551
2552 case 1:
2553 w0[0] = w0[0] | 0x8000;
2554 break;
2555
2556 case 2:
2557 w0[0] = w0[0] | 0x800000;
2558 break;
2559
2560 case 3:
2561 w0[0] = w0[0] | 0x80000000;
2562 break;
2563
2564 case 4:
2565 w0[1] = 0x80;
2566 break;
2567
2568 case 5:
2569 w0[1] = w0[1] | 0x8000;
2570 break;
2571
2572 case 6:
2573 w0[1] = w0[1] | 0x800000;
2574 break;
2575
2576 case 7:
2577 w0[1] = w0[1] | 0x80000000;
2578 break;
2579
2580 case 8:
2581 w0[2] = 0x80;
2582 break;
2583
2584 case 9:
2585 w0[2] = w0[2] | 0x8000;
2586 break;
2587
2588 case 10:
2589 w0[2] = w0[2] | 0x800000;
2590 break;
2591
2592 case 11:
2593 w0[2] = w0[2] | 0x80000000;
2594 break;
2595
2596 case 12:
2597 w0[3] = 0x80;
2598 break;
2599
2600 case 13:
2601 w0[3] = w0[3] | 0x8000;
2602 break;
2603
2604 case 14:
2605 w0[3] = w0[3] | 0x800000;
2606 break;
2607
2608 case 15:
2609 w0[3] = w0[3] | 0x80000000;
2610 break;
2611 }
2612 }
2613
2614 static void append_0x80_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
2615 {
2616 switch (offset)
2617 {
2618 case 0:
2619 w0[0] = 0x80;
2620 break;
2621
2622 case 1:
2623 w0[0] = w0[0] | 0x8000;
2624 break;
2625
2626 case 2:
2627 w0[0] = w0[0] | 0x800000;
2628 break;
2629
2630 case 3:
2631 w0[0] = w0[0] | 0x80000000;
2632 break;
2633
2634 case 4:
2635 w0[1] = 0x80;
2636 break;
2637
2638 case 5:
2639 w0[1] = w0[1] | 0x8000;
2640 break;
2641
2642 case 6:
2643 w0[1] = w0[1] | 0x800000;
2644 break;
2645
2646 case 7:
2647 w0[1] = w0[1] | 0x80000000;
2648 break;
2649
2650 case 8:
2651 w0[2] = 0x80;
2652 break;
2653
2654 case 9:
2655 w0[2] = w0[2] | 0x8000;
2656 break;
2657
2658 case 10:
2659 w0[2] = w0[2] | 0x800000;
2660 break;
2661
2662 case 11:
2663 w0[2] = w0[2] | 0x80000000;
2664 break;
2665
2666 case 12:
2667 w0[3] = 0x80;
2668 break;
2669
2670 case 13:
2671 w0[3] = w0[3] | 0x8000;
2672 break;
2673
2674 case 14:
2675 w0[3] = w0[3] | 0x800000;
2676 break;
2677
2678 case 15:
2679 w0[3] = w0[3] | 0x80000000;
2680 break;
2681
2682 case 16:
2683 w1[0] = 0x80;
2684 break;
2685
2686 case 17:
2687 w1[0] = w1[0] | 0x8000;
2688 break;
2689
2690 case 18:
2691 w1[0] = w1[0] | 0x800000;
2692 break;
2693
2694 case 19:
2695 w1[0] = w1[0] | 0x80000000;
2696 break;
2697
2698 case 20:
2699 w1[1] = 0x80;
2700 break;
2701
2702 case 21:
2703 w1[1] = w1[1] | 0x8000;
2704 break;
2705
2706 case 22:
2707 w1[1] = w1[1] | 0x800000;
2708 break;
2709
2710 case 23:
2711 w1[1] = w1[1] | 0x80000000;
2712 break;
2713
2714 case 24:
2715 w1[2] = 0x80;
2716 break;
2717
2718 case 25:
2719 w1[2] = w1[2] | 0x8000;
2720 break;
2721
2722 case 26:
2723 w1[2] = w1[2] | 0x800000;
2724 break;
2725
2726 case 27:
2727 w1[2] = w1[2] | 0x80000000;
2728 break;
2729
2730 case 28:
2731 w1[3] = 0x80;
2732 break;
2733
2734 case 29:
2735 w1[3] = w1[3] | 0x8000;
2736 break;
2737
2738 case 30:
2739 w1[3] = w1[3] | 0x800000;
2740 break;
2741
2742 case 31:
2743 w1[3] = w1[3] | 0x80000000;
2744 break;
2745 }
2746 }
2747
2748 static void append_0x80_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
2749 {
2750 switch (offset)
2751 {
2752 case 0:
2753 w0[0] = 0x80;
2754 break;
2755
2756 case 1:
2757 w0[0] = w0[0] | 0x8000;
2758 break;
2759
2760 case 2:
2761 w0[0] = w0[0] | 0x800000;
2762 break;
2763
2764 case 3:
2765 w0[0] = w0[0] | 0x80000000;
2766 break;
2767
2768 case 4:
2769 w0[1] = 0x80;
2770 break;
2771
2772 case 5:
2773 w0[1] = w0[1] | 0x8000;
2774 break;
2775
2776 case 6:
2777 w0[1] = w0[1] | 0x800000;
2778 break;
2779
2780 case 7:
2781 w0[1] = w0[1] | 0x80000000;
2782 break;
2783
2784 case 8:
2785 w0[2] = 0x80;
2786 break;
2787
2788 case 9:
2789 w0[2] = w0[2] | 0x8000;
2790 break;
2791
2792 case 10:
2793 w0[2] = w0[2] | 0x800000;
2794 break;
2795
2796 case 11:
2797 w0[2] = w0[2] | 0x80000000;
2798 break;
2799
2800 case 12:
2801 w0[3] = 0x80;
2802 break;
2803
2804 case 13:
2805 w0[3] = w0[3] | 0x8000;
2806 break;
2807
2808 case 14:
2809 w0[3] = w0[3] | 0x800000;
2810 break;
2811
2812 case 15:
2813 w0[3] = w0[3] | 0x80000000;
2814 break;
2815
2816 case 16:
2817 w1[0] = 0x80;
2818 break;
2819
2820 case 17:
2821 w1[0] = w1[0] | 0x8000;
2822 break;
2823
2824 case 18:
2825 w1[0] = w1[0] | 0x800000;
2826 break;
2827
2828 case 19:
2829 w1[0] = w1[0] | 0x80000000;
2830 break;
2831
2832 case 20:
2833 w1[1] = 0x80;
2834 break;
2835
2836 case 21:
2837 w1[1] = w1[1] | 0x8000;
2838 break;
2839
2840 case 22:
2841 w1[1] = w1[1] | 0x800000;
2842 break;
2843
2844 case 23:
2845 w1[1] = w1[1] | 0x80000000;
2846 break;
2847
2848 case 24:
2849 w1[2] = 0x80;
2850 break;
2851
2852 case 25:
2853 w1[2] = w1[2] | 0x8000;
2854 break;
2855
2856 case 26:
2857 w1[2] = w1[2] | 0x800000;
2858 break;
2859
2860 case 27:
2861 w1[2] = w1[2] | 0x80000000;
2862 break;
2863
2864 case 28:
2865 w1[3] = 0x80;
2866 break;
2867
2868 case 29:
2869 w1[3] = w1[3] | 0x8000;
2870 break;
2871
2872 case 30:
2873 w1[3] = w1[3] | 0x800000;
2874 break;
2875
2876 case 31:
2877 w1[3] = w1[3] | 0x80000000;
2878 break;
2879
2880 case 32:
2881 w2[0] = 0x80;
2882 break;
2883
2884 case 33:
2885 w2[0] = w2[0] | 0x8000;
2886 break;
2887
2888 case 34:
2889 w2[0] = w2[0] | 0x800000;
2890 break;
2891
2892 case 35:
2893 w2[0] = w2[0] | 0x80000000;
2894 break;
2895
2896 case 36:
2897 w2[1] = 0x80;
2898 break;
2899
2900 case 37:
2901 w2[1] = w2[1] | 0x8000;
2902 break;
2903
2904 case 38:
2905 w2[1] = w2[1] | 0x800000;
2906 break;
2907
2908 case 39:
2909 w2[1] = w2[1] | 0x80000000;
2910 break;
2911
2912 case 40:
2913 w2[2] = 0x80;
2914 break;
2915
2916 case 41:
2917 w2[2] = w2[2] | 0x8000;
2918 break;
2919
2920 case 42:
2921 w2[2] = w2[2] | 0x800000;
2922 break;
2923
2924 case 43:
2925 w2[2] = w2[2] | 0x80000000;
2926 break;
2927
2928 case 44:
2929 w2[3] = 0x80;
2930 break;
2931
2932 case 45:
2933 w2[3] = w2[3] | 0x8000;
2934 break;
2935
2936 case 46:
2937 w2[3] = w2[3] | 0x800000;
2938 break;
2939
2940 case 47:
2941 w2[3] = w2[3] | 0x80000000;
2942 break;
2943 }
2944 }
2945
2946 static void append_0x80_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
2947 {
2948 switch (offset)
2949 {
2950 case 0:
2951 w0[0] = 0x80;
2952 break;
2953
2954 case 1:
2955 w0[0] = w0[0] | 0x8000;
2956 break;
2957
2958 case 2:
2959 w0[0] = w0[0] | 0x800000;
2960 break;
2961
2962 case 3:
2963 w0[0] = w0[0] | 0x80000000;
2964 break;
2965
2966 case 4:
2967 w0[1] = 0x80;
2968 break;
2969
2970 case 5:
2971 w0[1] = w0[1] | 0x8000;
2972 break;
2973
2974 case 6:
2975 w0[1] = w0[1] | 0x800000;
2976 break;
2977
2978 case 7:
2979 w0[1] = w0[1] | 0x80000000;
2980 break;
2981
2982 case 8:
2983 w0[2] = 0x80;
2984 break;
2985
2986 case 9:
2987 w0[2] = w0[2] | 0x8000;
2988 break;
2989
2990 case 10:
2991 w0[2] = w0[2] | 0x800000;
2992 break;
2993
2994 case 11:
2995 w0[2] = w0[2] | 0x80000000;
2996 break;
2997
2998 case 12:
2999 w0[3] = 0x80;
3000 break;
3001
3002 case 13:
3003 w0[3] = w0[3] | 0x8000;
3004 break;
3005
3006 case 14:
3007 w0[3] = w0[3] | 0x800000;
3008 break;
3009
3010 case 15:
3011 w0[3] = w0[3] | 0x80000000;
3012 break;
3013
3014 case 16:
3015 w1[0] = 0x80;
3016 break;
3017
3018 case 17:
3019 w1[0] = w1[0] | 0x8000;
3020 break;
3021
3022 case 18:
3023 w1[0] = w1[0] | 0x800000;
3024 break;
3025
3026 case 19:
3027 w1[0] = w1[0] | 0x80000000;
3028 break;
3029
3030 case 20:
3031 w1[1] = 0x80;
3032 break;
3033
3034 case 21:
3035 w1[1] = w1[1] | 0x8000;
3036 break;
3037
3038 case 22:
3039 w1[1] = w1[1] | 0x800000;
3040 break;
3041
3042 case 23:
3043 w1[1] = w1[1] | 0x80000000;
3044 break;
3045
3046 case 24:
3047 w1[2] = 0x80;
3048 break;
3049
3050 case 25:
3051 w1[2] = w1[2] | 0x8000;
3052 break;
3053
3054 case 26:
3055 w1[2] = w1[2] | 0x800000;
3056 break;
3057
3058 case 27:
3059 w1[2] = w1[2] | 0x80000000;
3060 break;
3061
3062 case 28:
3063 w1[3] = 0x80;
3064 break;
3065
3066 case 29:
3067 w1[3] = w1[3] | 0x8000;
3068 break;
3069
3070 case 30:
3071 w1[3] = w1[3] | 0x800000;
3072 break;
3073
3074 case 31:
3075 w1[3] = w1[3] | 0x80000000;
3076 break;
3077
3078 case 32:
3079 w2[0] = 0x80;
3080 break;
3081
3082 case 33:
3083 w2[0] = w2[0] | 0x8000;
3084 break;
3085
3086 case 34:
3087 w2[0] = w2[0] | 0x800000;
3088 break;
3089
3090 case 35:
3091 w2[0] = w2[0] | 0x80000000;
3092 break;
3093
3094 case 36:
3095 w2[1] = 0x80;
3096 break;
3097
3098 case 37:
3099 w2[1] = w2[1] | 0x8000;
3100 break;
3101
3102 case 38:
3103 w2[1] = w2[1] | 0x800000;
3104 break;
3105
3106 case 39:
3107 w2[1] = w2[1] | 0x80000000;
3108 break;
3109
3110 case 40:
3111 w2[2] = 0x80;
3112 break;
3113
3114 case 41:
3115 w2[2] = w2[2] | 0x8000;
3116 break;
3117
3118 case 42:
3119 w2[2] = w2[2] | 0x800000;
3120 break;
3121
3122 case 43:
3123 w2[2] = w2[2] | 0x80000000;
3124 break;
3125
3126 case 44:
3127 w2[3] = 0x80;
3128 break;
3129
3130 case 45:
3131 w2[3] = w2[3] | 0x8000;
3132 break;
3133
3134 case 46:
3135 w2[3] = w2[3] | 0x800000;
3136 break;
3137
3138 case 47:
3139 w2[3] = w2[3] | 0x80000000;
3140 break;
3141
3142 case 48:
3143 w3[0] = 0x80;
3144 break;
3145
3146 case 49:
3147 w3[0] = w3[0] | 0x8000;
3148 break;
3149
3150 case 50:
3151 w3[0] = w3[0] | 0x800000;
3152 break;
3153
3154 case 51:
3155 w3[0] = w3[0] | 0x80000000;
3156 break;
3157
3158 case 52:
3159 w3[1] = 0x80;
3160 break;
3161
3162 case 53:
3163 w3[1] = w3[1] | 0x8000;
3164 break;
3165
3166 case 54:
3167 w3[1] = w3[1] | 0x800000;
3168 break;
3169
3170 case 55:
3171 w3[1] = w3[1] | 0x80000000;
3172 break;
3173
3174 case 56:
3175 w3[2] = 0x80;
3176 break;
3177
3178 case 57:
3179 w3[2] = w3[2] | 0x8000;
3180 break;
3181
3182 case 58:
3183 w3[2] = w3[2] | 0x800000;
3184 break;
3185
3186 case 59:
3187 w3[2] = w3[2] | 0x80000000;
3188 break;
3189
3190 case 60:
3191 w3[3] = 0x80;
3192 break;
3193
3194 case 61:
3195 w3[3] = w3[3] | 0x8000;
3196 break;
3197
3198 case 62:
3199 w3[3] = w3[3] | 0x800000;
3200 break;
3201
3202 case 63:
3203 w3[3] = w3[3] | 0x80000000;
3204 break;
3205 }
3206 }
3207
3208 static void append_0x80_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
3209 {
3210 switch (offset)
3211 {
3212 case 0:
3213 w0[0] = 0x80;
3214 break;
3215
3216 case 1:
3217 w0[0] = w0[0] | 0x8000;
3218 break;
3219
3220 case 2:
3221 w0[0] = w0[0] | 0x800000;
3222 break;
3223
3224 case 3:
3225 w0[0] = w0[0] | 0x80000000;
3226 break;
3227
3228 case 4:
3229 w0[1] = 0x80;
3230 break;
3231
3232 case 5:
3233 w0[1] = w0[1] | 0x8000;
3234 break;
3235
3236 case 6:
3237 w0[1] = w0[1] | 0x800000;
3238 break;
3239
3240 case 7:
3241 w0[1] = w0[1] | 0x80000000;
3242 break;
3243
3244 case 8:
3245 w0[2] = 0x80;
3246 break;
3247
3248 case 9:
3249 w0[2] = w0[2] | 0x8000;
3250 break;
3251
3252 case 10:
3253 w0[2] = w0[2] | 0x800000;
3254 break;
3255
3256 case 11:
3257 w0[2] = w0[2] | 0x80000000;
3258 break;
3259
3260 case 12:
3261 w0[3] = 0x80;
3262 break;
3263
3264 case 13:
3265 w0[3] = w0[3] | 0x8000;
3266 break;
3267
3268 case 14:
3269 w0[3] = w0[3] | 0x800000;
3270 break;
3271
3272 case 15:
3273 w0[3] = w0[3] | 0x80000000;
3274 break;
3275
3276 case 16:
3277 w1[0] = 0x80;
3278 break;
3279
3280 case 17:
3281 w1[0] = w1[0] | 0x8000;
3282 break;
3283
3284 case 18:
3285 w1[0] = w1[0] | 0x800000;
3286 break;
3287
3288 case 19:
3289 w1[0] = w1[0] | 0x80000000;
3290 break;
3291
3292 case 20:
3293 w1[1] = 0x80;
3294 break;
3295
3296 case 21:
3297 w1[1] = w1[1] | 0x8000;
3298 break;
3299
3300 case 22:
3301 w1[1] = w1[1] | 0x800000;
3302 break;
3303
3304 case 23:
3305 w1[1] = w1[1] | 0x80000000;
3306 break;
3307
3308 case 24:
3309 w1[2] = 0x80;
3310 break;
3311
3312 case 25:
3313 w1[2] = w1[2] | 0x8000;
3314 break;
3315
3316 case 26:
3317 w1[2] = w1[2] | 0x800000;
3318 break;
3319
3320 case 27:
3321 w1[2] = w1[2] | 0x80000000;
3322 break;
3323
3324 case 28:
3325 w1[3] = 0x80;
3326 break;
3327
3328 case 29:
3329 w1[3] = w1[3] | 0x8000;
3330 break;
3331
3332 case 30:
3333 w1[3] = w1[3] | 0x800000;
3334 break;
3335
3336 case 31:
3337 w1[3] = w1[3] | 0x80000000;
3338 break;
3339
3340 case 32:
3341 w2[0] = 0x80;
3342 break;
3343
3344 case 33:
3345 w2[0] = w2[0] | 0x8000;
3346 break;
3347
3348 case 34:
3349 w2[0] = w2[0] | 0x800000;
3350 break;
3351
3352 case 35:
3353 w2[0] = w2[0] | 0x80000000;
3354 break;
3355
3356 case 36:
3357 w2[1] = 0x80;
3358 break;
3359
3360 case 37:
3361 w2[1] = w2[1] | 0x8000;
3362 break;
3363
3364 case 38:
3365 w2[1] = w2[1] | 0x800000;
3366 break;
3367
3368 case 39:
3369 w2[1] = w2[1] | 0x80000000;
3370 break;
3371
3372 case 40:
3373 w2[2] = 0x80;
3374 break;
3375
3376 case 41:
3377 w2[2] = w2[2] | 0x8000;
3378 break;
3379
3380 case 42:
3381 w2[2] = w2[2] | 0x800000;
3382 break;
3383
3384 case 43:
3385 w2[2] = w2[2] | 0x80000000;
3386 break;
3387
3388 case 44:
3389 w2[3] = 0x80;
3390 break;
3391
3392 case 45:
3393 w2[3] = w2[3] | 0x8000;
3394 break;
3395
3396 case 46:
3397 w2[3] = w2[3] | 0x800000;
3398 break;
3399
3400 case 47:
3401 w2[3] = w2[3] | 0x80000000;
3402 break;
3403
3404 case 48:
3405 w3[0] = 0x80;
3406 break;
3407
3408 case 49:
3409 w3[0] = w3[0] | 0x8000;
3410 break;
3411
3412 case 50:
3413 w3[0] = w3[0] | 0x800000;
3414 break;
3415
3416 case 51:
3417 w3[0] = w3[0] | 0x80000000;
3418 break;
3419
3420 case 52:
3421 w3[1] = 0x80;
3422 break;
3423
3424 case 53:
3425 w3[1] = w3[1] | 0x8000;
3426 break;
3427
3428 case 54:
3429 w3[1] = w3[1] | 0x800000;
3430 break;
3431
3432 case 55:
3433 w3[1] = w3[1] | 0x80000000;
3434 break;
3435
3436 case 56:
3437 w3[2] = 0x80;
3438 break;
3439
3440 case 57:
3441 w3[2] = w3[2] | 0x8000;
3442 break;
3443
3444 case 58:
3445 w3[2] = w3[2] | 0x800000;
3446 break;
3447
3448 case 59:
3449 w3[2] = w3[2] | 0x80000000;
3450 break;
3451
3452 case 60:
3453 w3[3] = 0x80;
3454 break;
3455
3456 case 61:
3457 w3[3] = w3[3] | 0x8000;
3458 break;
3459
3460 case 62:
3461 w3[3] = w3[3] | 0x800000;
3462 break;
3463
3464 case 63:
3465 w3[3] = w3[3] | 0x80000000;
3466 break;
3467
3468 case 64:
3469 w4[0] = 0x80;
3470 break;
3471
3472 case 65:
3473 w4[0] = w4[0] | 0x8000;
3474 break;
3475
3476 case 66:
3477 w4[0] = w4[0] | 0x800000;
3478 break;
3479
3480 case 67:
3481 w4[0] = w4[0] | 0x80000000;
3482 break;
3483
3484 case 68:
3485 w4[1] = 0x80;
3486 break;
3487
3488 case 69:
3489 w4[1] = w4[1] | 0x8000;
3490 break;
3491
3492 case 70:
3493 w4[1] = w4[1] | 0x800000;
3494 break;
3495
3496 case 71:
3497 w4[1] = w4[1] | 0x80000000;
3498 break;
3499
3500 case 72:
3501 w4[2] = 0x80;
3502 break;
3503
3504 case 73:
3505 w4[2] = w4[2] | 0x8000;
3506 break;
3507
3508 case 74:
3509 w4[2] = w4[2] | 0x800000;
3510 break;
3511
3512 case 75:
3513 w4[2] = w4[2] | 0x80000000;
3514 break;
3515
3516 case 76:
3517 w4[3] = 0x80;
3518 break;
3519
3520 case 77:
3521 w4[3] = w4[3] | 0x8000;
3522 break;
3523
3524 case 78:
3525 w4[3] = w4[3] | 0x800000;
3526 break;
3527
3528 case 79:
3529 w4[3] = w4[3] | 0x80000000;
3530 break;
3531
3532 case 80:
3533 w5[0] = 0x80;
3534 break;
3535
3536 case 81:
3537 w5[0] = w5[0] | 0x8000;
3538 break;
3539
3540 case 82:
3541 w5[0] = w5[0] | 0x800000;
3542 break;
3543
3544 case 83:
3545 w5[0] = w5[0] | 0x80000000;
3546 break;
3547
3548 case 84:
3549 w5[1] = 0x80;
3550 break;
3551
3552 case 85:
3553 w5[1] = w5[1] | 0x8000;
3554 break;
3555
3556 case 86:
3557 w5[1] = w5[1] | 0x800000;
3558 break;
3559
3560 case 87:
3561 w5[1] = w5[1] | 0x80000000;
3562 break;
3563
3564 case 88:
3565 w5[2] = 0x80;
3566 break;
3567
3568 case 89:
3569 w5[2] = w5[2] | 0x8000;
3570 break;
3571
3572 case 90:
3573 w5[2] = w5[2] | 0x800000;
3574 break;
3575
3576 case 91:
3577 w5[2] = w5[2] | 0x80000000;
3578 break;
3579
3580 case 92:
3581 w5[3] = 0x80;
3582 break;
3583
3584 case 93:
3585 w5[3] = w5[3] | 0x8000;
3586 break;
3587
3588 case 94:
3589 w5[3] = w5[3] | 0x800000;
3590 break;
3591
3592 case 95:
3593 w5[3] = w5[3] | 0x80000000;
3594 break;
3595
3596 case 96:
3597 w6[0] = 0x80;
3598 break;
3599
3600 case 97:
3601 w6[0] = w6[0] | 0x8000;
3602 break;
3603
3604 case 98:
3605 w6[0] = w6[0] | 0x800000;
3606 break;
3607
3608 case 99:
3609 w6[0] = w6[0] | 0x80000000;
3610 break;
3611
3612 case 100:
3613 w6[1] = 0x80;
3614 break;
3615
3616 case 101:
3617 w6[1] = w6[1] | 0x8000;
3618 break;
3619
3620 case 102:
3621 w6[1] = w6[1] | 0x800000;
3622 break;
3623
3624 case 103:
3625 w6[1] = w6[1] | 0x80000000;
3626 break;
3627
3628 case 104:
3629 w6[2] = 0x80;
3630 break;
3631
3632 case 105:
3633 w6[2] = w6[2] | 0x8000;
3634 break;
3635
3636 case 106:
3637 w6[2] = w6[2] | 0x800000;
3638 break;
3639
3640 case 107:
3641 w6[2] = w6[2] | 0x80000000;
3642 break;
3643
3644 case 108:
3645 w6[3] = 0x80;
3646 break;
3647
3648 case 109:
3649 w6[3] = w6[3] | 0x8000;
3650 break;
3651
3652 case 110:
3653 w6[3] = w6[3] | 0x800000;
3654 break;
3655
3656 case 111:
3657 w6[3] = w6[3] | 0x80000000;
3658 break;
3659
3660 case 112:
3661 w7[0] = 0x80;
3662 break;
3663
3664 case 113:
3665 w7[0] = w7[0] | 0x8000;
3666 break;
3667
3668 case 114:
3669 w7[0] = w7[0] | 0x800000;
3670 break;
3671
3672 case 115:
3673 w7[0] = w7[0] | 0x80000000;
3674 break;
3675
3676 case 116:
3677 w7[1] = 0x80;
3678 break;
3679
3680 case 117:
3681 w7[1] = w7[1] | 0x8000;
3682 break;
3683
3684 case 118:
3685 w7[1] = w7[1] | 0x800000;
3686 break;
3687
3688 case 119:
3689 w7[1] = w7[1] | 0x80000000;
3690 break;
3691
3692 case 120:
3693 w7[2] = 0x80;
3694 break;
3695
3696 case 121:
3697 w7[2] = w7[2] | 0x8000;
3698 break;
3699
3700 case 122:
3701 w7[2] = w7[2] | 0x800000;
3702 break;
3703
3704 case 123:
3705 w7[2] = w7[2] | 0x80000000;
3706 break;
3707
3708 case 124:
3709 w7[3] = 0x80;
3710 break;
3711
3712 case 125:
3713 w7[3] = w7[3] | 0x8000;
3714 break;
3715
3716 case 126:
3717 w7[3] = w7[3] | 0x800000;
3718 break;
3719
3720 case 127:
3721 w7[3] = w7[3] | 0x80000000;
3722 break;
3723 }
3724 }
3725
3726 static void append_0x80_1x16 (u32 w[16], const u32 offset)
3727 {
3728 switch (offset)
3729 {
3730 case 0:
3731 w[ 0] = 0x80;
3732 break;
3733
3734 case 1:
3735 w[ 0] = w[ 0] | 0x8000;
3736 break;
3737
3738 case 2:
3739 w[ 0] = w[ 0] | 0x800000;
3740 break;
3741
3742 case 3:
3743 w[ 0] = w[ 0] | 0x80000000;
3744 break;
3745
3746 case 4:
3747 w[ 1] = 0x80;
3748 break;
3749
3750 case 5:
3751 w[ 1] = w[ 1] | 0x8000;
3752 break;
3753
3754 case 6:
3755 w[ 1] = w[ 1] | 0x800000;
3756 break;
3757
3758 case 7:
3759 w[ 1] = w[ 1] | 0x80000000;
3760 break;
3761
3762 case 8:
3763 w[ 2] = 0x80;
3764 break;
3765
3766 case 9:
3767 w[ 2] = w[ 2] | 0x8000;
3768 break;
3769
3770 case 10:
3771 w[ 2] = w[ 2] | 0x800000;
3772 break;
3773
3774 case 11:
3775 w[ 2] = w[ 2] | 0x80000000;
3776 break;
3777
3778 case 12:
3779 w[ 3] = 0x80;
3780 break;
3781
3782 case 13:
3783 w[ 3] = w[ 3] | 0x8000;
3784 break;
3785
3786 case 14:
3787 w[ 3] = w[ 3] | 0x800000;
3788 break;
3789
3790 case 15:
3791 w[ 3] = w[ 3] | 0x80000000;
3792 break;
3793
3794 case 16:
3795 w[ 4] = 0x80;
3796 break;
3797
3798 case 17:
3799 w[ 4] = w[ 4] | 0x8000;
3800 break;
3801
3802 case 18:
3803 w[ 4] = w[ 4] | 0x800000;
3804 break;
3805
3806 case 19:
3807 w[ 4] = w[ 4] | 0x80000000;
3808 break;
3809
3810 case 20:
3811 w[ 5] = 0x80;
3812 break;
3813
3814 case 21:
3815 w[ 5] = w[ 5] | 0x8000;
3816 break;
3817
3818 case 22:
3819 w[ 5] = w[ 5] | 0x800000;
3820 break;
3821
3822 case 23:
3823 w[ 5] = w[ 5] | 0x80000000;
3824 break;
3825
3826 case 24:
3827 w[ 6] = 0x80;
3828 break;
3829
3830 case 25:
3831 w[ 6] = w[ 6] | 0x8000;
3832 break;
3833
3834 case 26:
3835 w[ 6] = w[ 6] | 0x800000;
3836 break;
3837
3838 case 27:
3839 w[ 6] = w[ 6] | 0x80000000;
3840 break;
3841
3842 case 28:
3843 w[ 7] = 0x80;
3844 break;
3845
3846 case 29:
3847 w[ 7] = w[ 7] | 0x8000;
3848 break;
3849
3850 case 30:
3851 w[ 7] = w[ 7] | 0x800000;
3852 break;
3853
3854 case 31:
3855 w[ 7] = w[ 7] | 0x80000000;
3856 break;
3857
3858 case 32:
3859 w[ 8] = 0x80;
3860 break;
3861
3862 case 33:
3863 w[ 8] = w[ 8] | 0x8000;
3864 break;
3865
3866 case 34:
3867 w[ 8] = w[ 8] | 0x800000;
3868 break;
3869
3870 case 35:
3871 w[ 8] = w[ 8] | 0x80000000;
3872 break;
3873
3874 case 36:
3875 w[ 9] = 0x80;
3876 break;
3877
3878 case 37:
3879 w[ 9] = w[ 9] | 0x8000;
3880 break;
3881
3882 case 38:
3883 w[ 9] = w[ 9] | 0x800000;
3884 break;
3885
3886 case 39:
3887 w[ 9] = w[ 9] | 0x80000000;
3888 break;
3889
3890 case 40:
3891 w[10] = 0x80;
3892 break;
3893
3894 case 41:
3895 w[10] = w[10] | 0x8000;
3896 break;
3897
3898 case 42:
3899 w[10] = w[10] | 0x800000;
3900 break;
3901
3902 case 43:
3903 w[10] = w[10] | 0x80000000;
3904 break;
3905
3906 case 44:
3907 w[11] = 0x80;
3908 break;
3909
3910 case 45:
3911 w[11] = w[11] | 0x8000;
3912 break;
3913
3914 case 46:
3915 w[11] = w[11] | 0x800000;
3916 break;
3917
3918 case 47:
3919 w[11] = w[11] | 0x80000000;
3920 break;
3921
3922 case 48:
3923 w[12] = 0x80;
3924 break;
3925
3926 case 49:
3927 w[12] = w[12] | 0x8000;
3928 break;
3929
3930 case 50:
3931 w[12] = w[12] | 0x800000;
3932 break;
3933
3934 case 51:
3935 w[12] = w[12] | 0x80000000;
3936 break;
3937
3938 case 52:
3939 w[13] = 0x80;
3940 break;
3941
3942 case 53:
3943 w[13] = w[13] | 0x8000;
3944 break;
3945
3946 case 54:
3947 w[13] = w[13] | 0x800000;
3948 break;
3949
3950 case 55:
3951 w[13] = w[13] | 0x80000000;
3952 break;
3953
3954 case 56:
3955 w[14] = 0x80;
3956 break;
3957
3958 case 57:
3959 w[14] = w[14] | 0x8000;
3960 break;
3961
3962 case 58:
3963 w[14] = w[14] | 0x800000;
3964 break;
3965
3966 case 59:
3967 w[14] = w[14] | 0x80000000;
3968 break;
3969
3970 case 60:
3971 w[15] = 0x80;
3972 break;
3973
3974 case 61:
3975 w[15] = w[15] | 0x8000;
3976 break;
3977
3978 case 62:
3979 w[15] = w[15] | 0x800000;
3980 break;
3981
3982 case 63:
3983 w[15] = w[15] | 0x80000000;
3984 break;
3985 }
3986 }
3987
3988 static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
3989 {
3990 #if defined IS_AMD || defined IS_GENERIC
3991 const int offset_mod_4 = offset & 3;
3992
3993 const int offset_minus_4 = 4 - offset;
3994
3995 switch (offset / 4)
3996 {
3997 case 0:
3998 w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4);
3999 w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
4000 w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
4001 w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
4002 w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
4003 w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
4004 w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
4005 w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
4006 w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
4007 w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
4008 w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4009 w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4010 w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4011 w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4012 w0[0] = amd_bytealign (w0[0], 0, offset_minus_4);
4013
4014 if (offset_mod_4 == 0)
4015 {
4016 w0[0] = w0[1];
4017 w0[1] = w0[2];
4018 w0[2] = w0[3];
4019 w0[3] = w1[0];
4020 w1[0] = w1[1];
4021 w1[1] = w1[2];
4022 w1[2] = w1[3];
4023 w1[3] = w2[0];
4024 w2[0] = w2[1];
4025 w2[1] = w2[2];
4026 w2[2] = w2[3];
4027 w2[3] = w3[0];
4028 w3[0] = w3[1];
4029 w3[1] = w3[2];
4030 w3[2] = 0;
4031 }
4032
4033 break;
4034
4035 case 1:
4036 w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4);
4037 w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
4038 w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
4039 w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
4040 w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
4041 w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
4042 w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
4043 w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
4044 w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
4045 w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4046 w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4047 w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4048 w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4049 w0[1] = amd_bytealign (w0[0], 0, offset_minus_4);
4050 w0[0] = 0;
4051
4052 if (offset_mod_4 == 0)
4053 {
4054 w0[1] = w0[2];
4055 w0[2] = w0[3];
4056 w0[3] = w1[0];
4057 w1[0] = w1[1];
4058 w1[1] = w1[2];
4059 w1[2] = w1[3];
4060 w1[3] = w2[0];
4061 w2[0] = w2[1];
4062 w2[1] = w2[2];
4063 w2[2] = w2[3];
4064 w2[3] = w3[0];
4065 w3[0] = w3[1];
4066 w3[1] = w3[2];
4067 w3[2] = 0;
4068 }
4069
4070 break;
4071
4072 case 2:
4073 w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4);
4074 w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
4075 w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
4076 w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
4077 w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
4078 w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
4079 w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
4080 w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
4081 w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4082 w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4083 w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4084 w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4085 w0[2] = amd_bytealign (w0[0], 0, offset_minus_4);
4086 w0[1] = 0;
4087 w0[0] = 0;
4088
4089 if (offset_mod_4 == 0)
4090 {
4091 w0[2] = w0[3];
4092 w0[3] = w1[0];
4093 w1[0] = w1[1];
4094 w1[1] = w1[2];
4095 w1[2] = w1[3];
4096 w1[3] = w2[0];
4097 w2[0] = w2[1];
4098 w2[1] = w2[2];
4099 w2[2] = w2[3];
4100 w2[3] = w3[0];
4101 w3[0] = w3[1];
4102 w3[1] = w3[2];
4103 w3[2] = 0;
4104 }
4105
4106 break;
4107
4108 case 3:
4109 w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4);
4110 w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
4111 w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
4112 w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
4113 w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
4114 w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
4115 w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
4116 w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4117 w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4118 w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4119 w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4120 w0[3] = amd_bytealign (w0[0], 0, offset_minus_4);
4121 w0[2] = 0;
4122 w0[1] = 0;
4123 w0[0] = 0;
4124
4125 if (offset_mod_4 == 0)
4126 {
4127 w0[3] = w1[0];
4128 w1[0] = w1[1];
4129 w1[1] = w1[2];
4130 w1[2] = w1[3];
4131 w1[3] = w2[0];
4132 w2[0] = w2[1];
4133 w2[1] = w2[2];
4134 w2[2] = w2[3];
4135 w2[3] = w3[0];
4136 w3[0] = w3[1];
4137 w3[1] = w3[2];
4138 w3[2] = 0;
4139 }
4140
4141 break;
4142
4143 case 4:
4144 w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4);
4145 w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
4146 w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
4147 w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
4148 w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
4149 w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
4150 w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4151 w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4152 w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4153 w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4154 w1[0] = amd_bytealign (w0[0], 0, offset_minus_4);
4155 w0[3] = 0;
4156 w0[2] = 0;
4157 w0[1] = 0;
4158 w0[0] = 0;
4159
4160 if (offset_mod_4 == 0)
4161 {
4162 w1[0] = w1[1];
4163 w1[1] = w1[2];
4164 w1[2] = w1[3];
4165 w1[3] = w2[0];
4166 w2[0] = w2[1];
4167 w2[1] = w2[2];
4168 w2[2] = w2[3];
4169 w2[3] = w3[0];
4170 w3[0] = w3[1];
4171 w3[1] = w3[2];
4172 w3[2] = 0;
4173 }
4174
4175 break;
4176
4177 case 5:
4178 w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4);
4179 w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
4180 w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
4181 w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
4182 w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
4183 w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4184 w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4185 w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4186 w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4187 w1[1] = amd_bytealign (w0[0], 0, offset_minus_4);
4188 w1[0] = 0;
4189 w0[3] = 0;
4190 w0[2] = 0;
4191 w0[1] = 0;
4192 w0[0] = 0;
4193
4194 if (offset_mod_4 == 0)
4195 {
4196 w1[1] = w1[2];
4197 w1[2] = w1[3];
4198 w1[3] = w2[0];
4199 w2[0] = w2[1];
4200 w2[1] = w2[2];
4201 w2[2] = w2[3];
4202 w2[3] = w3[0];
4203 w3[0] = w3[1];
4204 w3[1] = w3[2];
4205 w3[2] = 0;
4206 }
4207
4208 break;
4209
4210 case 6:
4211 w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4);
4212 w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
4213 w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
4214 w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
4215 w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4216 w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4217 w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4218 w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4219 w1[2] = amd_bytealign (w0[0], 0, offset_minus_4);
4220 w1[1] = 0;
4221 w1[0] = 0;
4222 w0[3] = 0;
4223 w0[2] = 0;
4224 w0[1] = 0;
4225 w0[0] = 0;
4226
4227 if (offset_mod_4 == 0)
4228 {
4229 w1[2] = w1[3];
4230 w1[3] = w2[0];
4231 w2[0] = w2[1];
4232 w2[1] = w2[2];
4233 w2[2] = w2[3];
4234 w2[3] = w3[0];
4235 w3[0] = w3[1];
4236 w3[1] = w3[2];
4237 w3[2] = 0;
4238 }
4239
4240 break;
4241
4242 case 7:
4243 w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4);
4244 w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
4245 w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
4246 w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4247 w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4248 w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4249 w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4250 w1[3] = amd_bytealign (w0[0], 0, offset_minus_4);
4251 w1[2] = 0;
4252 w1[1] = 0;
4253 w1[0] = 0;
4254 w0[3] = 0;
4255 w0[2] = 0;
4256 w0[1] = 0;
4257 w0[0] = 0;
4258
4259 if (offset_mod_4 == 0)
4260 {
4261 w1[3] = w2[0];
4262 w2[0] = w2[1];
4263 w2[1] = w2[2];
4264 w2[2] = w2[3];
4265 w2[3] = w3[0];
4266 w3[0] = w3[1];
4267 w3[1] = w3[2];
4268 w3[2] = 0;
4269 }
4270
4271 break;
4272
4273 case 8:
4274 w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4);
4275 w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
4276 w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4277 w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4278 w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4279 w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4280 w2[0] = amd_bytealign (w0[0], 0, offset_minus_4);
4281 w1[3] = 0;
4282 w1[2] = 0;
4283 w1[1] = 0;
4284 w1[0] = 0;
4285 w0[3] = 0;
4286 w0[2] = 0;
4287 w0[1] = 0;
4288 w0[0] = 0;
4289
4290 if (offset_mod_4 == 0)
4291 {
4292 w2[0] = w2[1];
4293 w2[1] = w2[2];
4294 w2[2] = w2[3];
4295 w2[3] = w3[0];
4296 w3[0] = w3[1];
4297 w3[1] = w3[2];
4298 w3[2] = 0;
4299 }
4300
4301 break;
4302
4303 case 9:
4304 w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4);
4305 w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
4306 w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4307 w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4308 w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4309 w2[1] = amd_bytealign (w0[0], 0, offset_minus_4);
4310 w2[0] = 0;
4311 w1[3] = 0;
4312 w1[2] = 0;
4313 w1[1] = 0;
4314 w1[0] = 0;
4315 w0[3] = 0;
4316 w0[2] = 0;
4317 w0[1] = 0;
4318 w0[0] = 0;
4319
4320 if (offset_mod_4 == 0)
4321 {
4322 w2[1] = w2[2];
4323 w2[2] = w2[3];
4324 w2[3] = w3[0];
4325 w3[0] = w3[1];
4326 w3[1] = w3[2];
4327 w3[2] = 0;
4328 }
4329
4330 break;
4331
4332 case 10:
4333 w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4);
4334 w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
4335 w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4336 w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4337 w2[2] = amd_bytealign (w0[0], 0, offset_minus_4);
4338 w2[1] = 0;
4339 w2[0] = 0;
4340 w1[3] = 0;
4341 w1[2] = 0;
4342 w1[1] = 0;
4343 w1[0] = 0;
4344 w0[3] = 0;
4345 w0[2] = 0;
4346 w0[1] = 0;
4347 w0[0] = 0;
4348
4349 if (offset_mod_4 == 0)
4350 {
4351 w2[2] = w2[3];
4352 w2[3] = w3[0];
4353 w3[0] = w3[1];
4354 w3[1] = w3[2];
4355 w3[2] = 0;
4356 }
4357
4358 break;
4359
4360 case 11:
4361 w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4);
4362 w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
4363 w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4364 w2[3] = amd_bytealign (w0[0], 0, offset_minus_4);
4365 w2[2] = 0;
4366 w2[1] = 0;
4367 w2[0] = 0;
4368 w1[3] = 0;
4369 w1[2] = 0;
4370 w1[1] = 0;
4371 w1[0] = 0;
4372 w0[3] = 0;
4373 w0[2] = 0;
4374 w0[1] = 0;
4375 w0[0] = 0;
4376
4377 if (offset_mod_4 == 0)
4378 {
4379 w2[3] = w3[0];
4380 w3[0] = w3[1];
4381 w3[1] = w3[2];
4382 w3[2] = 0;
4383 }
4384
4385 break;
4386
4387 case 12:
4388 w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4);
4389 w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
4390 w3[0] = amd_bytealign (w0[0], 0, offset_minus_4);
4391 w2[3] = 0;
4392 w2[2] = 0;
4393 w2[1] = 0;
4394 w2[0] = 0;
4395 w1[3] = 0;
4396 w1[2] = 0;
4397 w1[1] = 0;
4398 w1[0] = 0;
4399 w0[3] = 0;
4400 w0[2] = 0;
4401 w0[1] = 0;
4402 w0[0] = 0;
4403
4404 if (offset_mod_4 == 0)
4405 {
4406 w3[0] = w3[1];
4407 w3[1] = w3[2];
4408 w3[2] = 0;
4409 }
4410
4411 break;
4412
4413 case 13:
4414 w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4);
4415 w3[1] = amd_bytealign (w0[0], 0, offset_minus_4);
4416 w3[0] = 0;
4417 w2[3] = 0;
4418 w2[2] = 0;
4419 w2[1] = 0;
4420 w2[0] = 0;
4421 w1[3] = 0;
4422 w1[2] = 0;
4423 w1[1] = 0;
4424 w1[0] = 0;
4425 w0[3] = 0;
4426 w0[2] = 0;
4427 w0[1] = 0;
4428 w0[0] = 0;
4429
4430 if (offset_mod_4 == 0)
4431 {
4432 w3[1] = w3[2];
4433 w3[2] = 0;
4434 }
4435
4436 break;
4437 }
4438 #endif
4439
4440 #ifdef IS_NV
4441 const int offset_minus_4 = 4 - (offset % 4);
4442
4443 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
4444
4445 switch (offset / 4)
4446 {
4447 case 0:
4448 w3[1] = __byte_perm (w3[0], w3[1], selector);
4449 w3[0] = __byte_perm (w2[3], w3[0], selector);
4450 w2[3] = __byte_perm (w2[2], w2[3], selector);
4451 w2[2] = __byte_perm (w2[1], w2[2], selector);
4452 w2[1] = __byte_perm (w2[0], w2[1], selector);
4453 w2[0] = __byte_perm (w1[3], w2[0], selector);
4454 w1[3] = __byte_perm (w1[2], w1[3], selector);
4455 w1[2] = __byte_perm (w1[1], w1[2], selector);
4456 w1[1] = __byte_perm (w1[0], w1[1], selector);
4457 w1[0] = __byte_perm (w0[3], w1[0], selector);
4458 w0[3] = __byte_perm (w0[2], w0[3], selector);
4459 w0[2] = __byte_perm (w0[1], w0[2], selector);
4460 w0[1] = __byte_perm (w0[0], w0[1], selector);
4461 w0[0] = __byte_perm ( 0, w0[0], selector);
4462
4463 break;
4464
4465 case 1:
4466 w3[1] = __byte_perm (w2[3], w3[0], selector);
4467 w3[0] = __byte_perm (w2[2], w2[3], selector);
4468 w2[3] = __byte_perm (w2[1], w2[2], selector);
4469 w2[2] = __byte_perm (w2[0], w2[1], selector);
4470 w2[1] = __byte_perm (w1[3], w2[0], selector);
4471 w2[0] = __byte_perm (w1[2], w1[3], selector);
4472 w1[3] = __byte_perm (w1[1], w1[2], selector);
4473 w1[2] = __byte_perm (w1[0], w1[1], selector);
4474 w1[1] = __byte_perm (w0[3], w1[0], selector);
4475 w1[0] = __byte_perm (w0[2], w0[3], selector);
4476 w0[3] = __byte_perm (w0[1], w0[2], selector);
4477 w0[2] = __byte_perm (w0[0], w0[1], selector);
4478 w0[1] = __byte_perm ( 0, w0[0], selector);
4479 w0[0] = 0;
4480
4481 break;
4482
4483 case 2:
4484 w3[1] = __byte_perm (w2[2], w2[3], selector);
4485 w3[0] = __byte_perm (w2[1], w2[2], selector);
4486 w2[3] = __byte_perm (w2[0], w2[1], selector);
4487 w2[2] = __byte_perm (w1[3], w2[0], selector);
4488 w2[1] = __byte_perm (w1[2], w1[3], selector);
4489 w2[0] = __byte_perm (w1[1], w1[2], selector);
4490 w1[3] = __byte_perm (w1[0], w1[1], selector);
4491 w1[2] = __byte_perm (w0[3], w1[0], selector);
4492 w1[1] = __byte_perm (w0[2], w0[3], selector);
4493 w1[0] = __byte_perm (w0[1], w0[2], selector);
4494 w0[3] = __byte_perm (w0[0], w0[1], selector);
4495 w0[2] = __byte_perm ( 0, w0[0], selector);
4496 w0[1] = 0;
4497 w0[0] = 0;
4498
4499 break;
4500
4501 case 3:
4502 w3[1] = __byte_perm (w2[1], w2[2], selector);
4503 w3[0] = __byte_perm (w2[0], w2[1], selector);
4504 w2[3] = __byte_perm (w1[3], w2[0], selector);
4505 w2[2] = __byte_perm (w1[2], w1[3], selector);
4506 w2[1] = __byte_perm (w1[1], w1[2], selector);
4507 w2[0] = __byte_perm (w1[0], w1[1], selector);
4508 w1[3] = __byte_perm (w0[3], w1[0], selector);
4509 w1[2] = __byte_perm (w0[2], w0[3], selector);
4510 w1[1] = __byte_perm (w0[1], w0[2], selector);
4511 w1[0] = __byte_perm (w0[0], w0[1], selector);
4512 w0[3] = __byte_perm ( 0, w0[0], selector);
4513 w0[2] = 0;
4514 w0[1] = 0;
4515 w0[0] = 0;
4516
4517 break;
4518
4519 case 4:
4520 w3[1] = __byte_perm (w2[0], w2[1], selector);
4521 w3[0] = __byte_perm (w1[3], w2[0], selector);
4522 w2[3] = __byte_perm (w1[2], w1[3], selector);
4523 w2[2] = __byte_perm (w1[1], w1[2], selector);
4524 w2[1] = __byte_perm (w1[0], w1[1], selector);
4525 w2[0] = __byte_perm (w0[3], w1[0], selector);
4526 w1[3] = __byte_perm (w0[2], w0[3], selector);
4527 w1[2] = __byte_perm (w0[1], w0[2], selector);
4528 w1[1] = __byte_perm (w0[0], w0[1], selector);
4529 w1[0] = __byte_perm ( 0, w0[0], selector);
4530 w0[3] = 0;
4531 w0[2] = 0;
4532 w0[1] = 0;
4533 w0[0] = 0;
4534
4535 break;
4536
4537 case 5:
4538 w3[1] = __byte_perm (w1[3], w2[0], selector);
4539 w3[0] = __byte_perm (w1[2], w1[3], selector);
4540 w2[3] = __byte_perm (w1[1], w1[2], selector);
4541 w2[2] = __byte_perm (w1[0], w1[1], selector);
4542 w2[1] = __byte_perm (w0[3], w1[0], selector);
4543 w2[0] = __byte_perm (w0[2], w0[3], selector);
4544 w1[3] = __byte_perm (w0[1], w0[2], selector);
4545 w1[2] = __byte_perm (w0[0], w0[1], selector);
4546 w1[1] = __byte_perm ( 0, w0[0], selector);
4547 w1[0] = 0;
4548 w0[3] = 0;
4549 w0[2] = 0;
4550 w0[1] = 0;
4551 w0[0] = 0;
4552
4553 break;
4554
4555 case 6:
4556 w3[1] = __byte_perm (w1[2], w1[3], selector);
4557 w3[0] = __byte_perm (w1[1], w1[2], selector);
4558 w2[3] = __byte_perm (w1[0], w1[1], selector);
4559 w2[2] = __byte_perm (w0[3], w1[0], selector);
4560 w2[1] = __byte_perm (w0[2], w0[3], selector);
4561 w2[0] = __byte_perm (w0[1], w0[2], selector);
4562 w1[3] = __byte_perm (w0[0], w0[1], selector);
4563 w1[2] = __byte_perm ( 0, w0[0], selector);
4564 w1[1] = 0;
4565 w1[0] = 0;
4566 w0[3] = 0;
4567 w0[2] = 0;
4568 w0[1] = 0;
4569 w0[0] = 0;
4570
4571 break;
4572
4573 case 7:
4574 w3[1] = __byte_perm (w1[1], w1[2], selector);
4575 w3[0] = __byte_perm (w1[0], w1[1], selector);
4576 w2[3] = __byte_perm (w0[3], w1[0], selector);
4577 w2[2] = __byte_perm (w0[2], w0[3], selector);
4578 w2[1] = __byte_perm (w0[1], w0[2], selector);
4579 w2[0] = __byte_perm (w0[0], w0[1], selector);
4580 w1[3] = __byte_perm ( 0, w0[0], selector);
4581 w1[2] = 0;
4582 w1[1] = 0;
4583 w1[0] = 0;
4584 w0[3] = 0;
4585 w0[2] = 0;
4586 w0[1] = 0;
4587 w0[0] = 0;
4588
4589 break;
4590
4591 case 8:
4592 w3[1] = __byte_perm (w1[0], w1[1], selector);
4593 w3[0] = __byte_perm (w0[3], w1[0], selector);
4594 w2[3] = __byte_perm (w0[2], w0[3], selector);
4595 w2[2] = __byte_perm (w0[1], w0[2], selector);
4596 w2[1] = __byte_perm (w0[0], w0[1], selector);
4597 w2[0] = __byte_perm ( 0, w0[0], selector);
4598 w1[3] = 0;
4599 w1[2] = 0;
4600 w1[1] = 0;
4601 w1[0] = 0;
4602 w0[3] = 0;
4603 w0[2] = 0;
4604 w0[1] = 0;
4605 w0[0] = 0;
4606
4607 break;
4608
4609 case 9:
4610 w3[1] = __byte_perm (w0[3], w1[0], selector);
4611 w3[0] = __byte_perm (w0[2], w0[3], selector);
4612 w2[3] = __byte_perm (w0[1], w0[2], selector);
4613 w2[2] = __byte_perm (w0[0], w0[1], selector);
4614 w2[1] = __byte_perm ( 0, w0[0], selector);
4615 w2[0] = 0;
4616 w1[3] = 0;
4617 w1[2] = 0;
4618 w1[1] = 0;
4619 w1[0] = 0;
4620 w0[3] = 0;
4621 w0[2] = 0;
4622 w0[1] = 0;
4623 w0[0] = 0;
4624
4625 break;
4626
4627 case 10:
4628 w3[1] = __byte_perm (w0[2], w0[3], selector);
4629 w3[0] = __byte_perm (w0[1], w0[2], selector);
4630 w2[3] = __byte_perm (w0[0], w0[1], selector);
4631 w2[2] = __byte_perm ( 0, w0[0], selector);
4632 w2[1] = 0;
4633 w2[0] = 0;
4634 w1[3] = 0;
4635 w1[2] = 0;
4636 w1[1] = 0;
4637 w1[0] = 0;
4638 w0[3] = 0;
4639 w0[2] = 0;
4640 w0[1] = 0;
4641 w0[0] = 0;
4642
4643 break;
4644
4645 case 11:
4646 w3[1] = __byte_perm (w0[1], w0[2], selector);
4647 w3[0] = __byte_perm (w0[0], w0[1], selector);
4648 w2[3] = __byte_perm ( 0, w0[0], selector);
4649 w2[2] = 0;
4650 w2[1] = 0;
4651 w2[0] = 0;
4652 w1[3] = 0;
4653 w1[2] = 0;
4654 w1[1] = 0;
4655 w1[0] = 0;
4656 w0[3] = 0;
4657 w0[2] = 0;
4658 w0[1] = 0;
4659 w0[0] = 0;
4660
4661 break;
4662
4663 case 12:
4664 w3[1] = __byte_perm (w0[0], w0[1], selector);
4665 w3[0] = __byte_perm ( 0, w0[0], selector);
4666 w2[3] = 0;
4667 w2[2] = 0;
4668 w2[1] = 0;
4669 w2[0] = 0;
4670 w1[3] = 0;
4671 w1[2] = 0;
4672 w1[1] = 0;
4673 w1[0] = 0;
4674 w0[3] = 0;
4675 w0[2] = 0;
4676 w0[1] = 0;
4677 w0[0] = 0;
4678
4679 break;
4680
4681 case 13:
4682 w3[1] = __byte_perm ( 0, w0[0], selector);
4683 w3[0] = 0;
4684 w2[3] = 0;
4685 w2[2] = 0;
4686 w2[1] = 0;
4687 w2[0] = 0;
4688 w1[3] = 0;
4689 w1[2] = 0;
4690 w1[1] = 0;
4691 w1[0] = 0;
4692 w0[3] = 0;
4693 w0[2] = 0;
4694 w0[1] = 0;
4695 w0[0] = 0;
4696
4697 break;
4698 }
4699 #endif
4700 }
4701
4702 static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
4703 {
4704 #if defined IS_AMD || defined IS_GENERIC
4705 switch (offset / 4)
4706 {
4707 case 0:
4708 w3[2] = amd_bytealign (w3[1], 0, offset);
4709 w3[1] = amd_bytealign (w3[0], w3[1], offset);
4710 w3[0] = amd_bytealign (w2[3], w3[0], offset);
4711 w2[3] = amd_bytealign (w2[2], w2[3], offset);
4712 w2[2] = amd_bytealign (w2[1], w2[2], offset);
4713 w2[1] = amd_bytealign (w2[0], w2[1], offset);
4714 w2[0] = amd_bytealign (w1[3], w2[0], offset);
4715 w1[3] = amd_bytealign (w1[2], w1[3], offset);
4716 w1[2] = amd_bytealign (w1[1], w1[2], offset);
4717 w1[1] = amd_bytealign (w1[0], w1[1], offset);
4718 w1[0] = amd_bytealign (w0[3], w1[0], offset);
4719 w0[3] = amd_bytealign (w0[2], w0[3], offset);
4720 w0[2] = amd_bytealign (w0[1], w0[2], offset);
4721 w0[1] = amd_bytealign (w0[0], w0[1], offset);
4722 w0[0] = amd_bytealign ( 0, w0[0], offset);
4723 break;
4724
4725 case 1:
4726 w3[2] = amd_bytealign (w3[0], 0, offset);
4727 w3[1] = amd_bytealign (w2[3], w3[0], offset);
4728 w3[0] = amd_bytealign (w2[2], w2[3], offset);
4729 w2[3] = amd_bytealign (w2[1], w2[2], offset);
4730 w2[2] = amd_bytealign (w2[0], w2[1], offset);
4731 w2[1] = amd_bytealign (w1[3], w2[0], offset);
4732 w2[0] = amd_bytealign (w1[2], w1[3], offset);
4733 w1[3] = amd_bytealign (w1[1], w1[2], offset);
4734 w1[2] = amd_bytealign (w1[0], w1[1], offset);
4735 w1[1] = amd_bytealign (w0[3], w1[0], offset);
4736 w1[0] = amd_bytealign (w0[2], w0[3], offset);
4737 w0[3] = amd_bytealign (w0[1], w0[2], offset);
4738 w0[2] = amd_bytealign (w0[0], w0[1], offset);
4739 w0[1] = amd_bytealign ( 0, w0[0], offset);
4740 w0[0] = 0;
4741 break;
4742
4743 case 2:
4744 w3[2] = amd_bytealign (w2[3], 0, offset);
4745 w3[1] = amd_bytealign (w2[2], w2[3], offset);
4746 w3[0] = amd_bytealign (w2[1], w2[2], offset);
4747 w2[3] = amd_bytealign (w2[0], w2[1], offset);
4748 w2[2] = amd_bytealign (w1[3], w2[0], offset);
4749 w2[1] = amd_bytealign (w1[2], w1[3], offset);
4750 w2[0] = amd_bytealign (w1[1], w1[2], offset);
4751 w1[3] = amd_bytealign (w1[0], w1[1], offset);
4752 w1[2] = amd_bytealign (w0[3], w1[0], offset);
4753 w1[1] = amd_bytealign (w0[2], w0[3], offset);
4754 w1[0] = amd_bytealign (w0[1], w0[2], offset);
4755 w0[3] = amd_bytealign (w0[0], w0[1], offset);
4756 w0[2] = amd_bytealign ( 0, w0[0], offset);
4757 w0[1] = 0;
4758 w0[0] = 0;
4759 break;
4760
4761 case 3:
4762 w3[2] = amd_bytealign (w2[2], 0, offset);
4763 w3[1] = amd_bytealign (w2[1], w2[2], offset);
4764 w3[0] = amd_bytealign (w2[0], w2[1], offset);
4765 w2[3] = amd_bytealign (w1[3], w2[0], offset);
4766 w2[2] = amd_bytealign (w1[2], w1[3], offset);
4767 w2[1] = amd_bytealign (w1[1], w1[2], offset);
4768 w2[0] = amd_bytealign (w1[0], w1[1], offset);
4769 w1[3] = amd_bytealign (w0[3], w1[0], offset);
4770 w1[2] = amd_bytealign (w0[2], w0[3], offset);
4771 w1[1] = amd_bytealign (w0[1], w0[2], offset);
4772 w1[0] = amd_bytealign (w0[0], w0[1], offset);
4773 w0[3] = amd_bytealign ( 0, w0[0], offset);
4774 w0[2] = 0;
4775 w0[1] = 0;
4776 w0[0] = 0;
4777 break;
4778
4779 case 4:
4780 w3[2] = amd_bytealign (w2[1], 0, offset);
4781 w3[1] = amd_bytealign (w2[0], w2[1], offset);
4782 w3[0] = amd_bytealign (w1[3], w2[0], offset);
4783 w2[3] = amd_bytealign (w1[2], w1[3], offset);
4784 w2[2] = amd_bytealign (w1[1], w1[2], offset);
4785 w2[1] = amd_bytealign (w1[0], w1[1], offset);
4786 w2[0] = amd_bytealign (w0[3], w1[0], offset);
4787 w1[3] = amd_bytealign (w0[2], w0[3], offset);
4788 w1[2] = amd_bytealign (w0[1], w0[2], offset);
4789 w1[1] = amd_bytealign (w0[0], w0[1], offset);
4790 w1[0] = amd_bytealign ( 0, w0[0], offset);
4791 w0[3] = 0;
4792 w0[2] = 0;
4793 w0[1] = 0;
4794 w0[0] = 0;
4795 break;
4796
4797 case 5:
4798 w3[2] = amd_bytealign (w2[0], 0, offset);
4799 w3[1] = amd_bytealign (w1[3], w2[0], offset);
4800 w3[0] = amd_bytealign (w1[2], w1[3], offset);
4801 w2[3] = amd_bytealign (w1[1], w1[2], offset);
4802 w2[2] = amd_bytealign (w1[0], w1[1], offset);
4803 w2[1] = amd_bytealign (w0[3], w1[0], offset);
4804 w2[0] = amd_bytealign (w0[2], w0[3], offset);
4805 w1[3] = amd_bytealign (w0[1], w0[2], offset);
4806 w1[2] = amd_bytealign (w0[0], w0[1], offset);
4807 w1[1] = amd_bytealign ( 0, w0[0], offset);
4808 w1[0] = 0;
4809 w0[3] = 0;
4810 w0[2] = 0;
4811 w0[1] = 0;
4812 w0[0] = 0;
4813 break;
4814
4815 case 6:
4816 w3[2] = amd_bytealign (w1[3], 0, offset);
4817 w3[1] = amd_bytealign (w1[2], w1[3], offset);
4818 w3[0] = amd_bytealign (w1[1], w1[2], offset);
4819 w2[3] = amd_bytealign (w1[0], w1[1], offset);
4820 w2[2] = amd_bytealign (w0[3], w1[0], offset);
4821 w2[1] = amd_bytealign (w0[2], w0[3], offset);
4822 w2[0] = amd_bytealign (w0[1], w0[2], offset);
4823 w1[3] = amd_bytealign (w0[0], w0[1], offset);
4824 w1[2] = amd_bytealign ( 0, w0[0], offset);
4825 w1[1] = 0;
4826 w1[0] = 0;
4827 w0[3] = 0;
4828 w0[2] = 0;
4829 w0[1] = 0;
4830 w0[0] = 0;
4831 break;
4832
4833 case 7:
4834 w3[2] = amd_bytealign (w1[2], 0, offset);
4835 w3[1] = amd_bytealign (w1[1], w1[2], offset);
4836 w3[0] = amd_bytealign (w1[0], w1[1], offset);
4837 w2[3] = amd_bytealign (w0[3], w1[0], offset);
4838 w2[2] = amd_bytealign (w0[2], w0[3], offset);
4839 w2[1] = amd_bytealign (w0[1], w0[2], offset);
4840 w2[0] = amd_bytealign (w0[0], w0[1], offset);
4841 w1[3] = amd_bytealign ( 0, w0[0], offset);
4842 w1[2] = 0;
4843 w1[1] = 0;
4844 w1[0] = 0;
4845 w0[3] = 0;
4846 w0[2] = 0;
4847 w0[1] = 0;
4848 w0[0] = 0;
4849 break;
4850
4851 case 8:
4852 w3[2] = amd_bytealign (w1[1], 0, offset);
4853 w3[1] = amd_bytealign (w1[0], w1[1], offset);
4854 w3[0] = amd_bytealign (w0[3], w1[0], offset);
4855 w2[3] = amd_bytealign (w0[2], w0[3], offset);
4856 w2[2] = amd_bytealign (w0[1], w0[2], offset);
4857 w2[1] = amd_bytealign (w0[0], w0[1], offset);
4858 w2[0] = amd_bytealign ( 0, w0[0], offset);
4859 w1[3] = 0;
4860 w1[2] = 0;
4861 w1[1] = 0;
4862 w1[0] = 0;
4863 w0[3] = 0;
4864 w0[2] = 0;
4865 w0[1] = 0;
4866 w0[0] = 0;
4867 break;
4868
4869 case 9:
4870 w3[2] = amd_bytealign (w1[0], 0, offset);
4871 w3[1] = amd_bytealign (w0[3], w1[0], offset);
4872 w3[0] = amd_bytealign (w0[2], w0[3], offset);
4873 w2[3] = amd_bytealign (w0[1], w0[2], offset);
4874 w2[2] = amd_bytealign (w0[0], w0[1], offset);
4875 w2[1] = amd_bytealign ( 0, w0[0], offset);
4876 w2[0] = 0;
4877 w1[3] = 0;
4878 w1[2] = 0;
4879 w1[1] = 0;
4880 w1[0] = 0;
4881 w0[3] = 0;
4882 w0[2] = 0;
4883 w0[1] = 0;
4884 w0[0] = 0;
4885 break;
4886
4887 case 10:
4888 w3[2] = amd_bytealign (w0[3], 0, offset);
4889 w3[1] = amd_bytealign (w0[2], w0[3], offset);
4890 w3[0] = amd_bytealign (w0[1], w0[2], offset);
4891 w2[3] = amd_bytealign (w0[0], w0[1], offset);
4892 w2[2] = amd_bytealign ( 0, w0[0], offset);
4893 w2[1] = 0;
4894 w2[0] = 0;
4895 w1[3] = 0;
4896 w1[2] = 0;
4897 w1[1] = 0;
4898 w1[0] = 0;
4899 w0[3] = 0;
4900 w0[2] = 0;
4901 w0[1] = 0;
4902 w0[0] = 0;
4903 break;
4904
4905 case 11:
4906 w3[2] = amd_bytealign (w0[2], 0, offset);
4907 w3[1] = amd_bytealign (w0[1], w0[2], offset);
4908 w3[0] = amd_bytealign (w0[0], w0[1], offset);
4909 w2[3] = amd_bytealign ( 0, w0[0], offset);
4910 w2[2] = 0;
4911 w2[1] = 0;
4912 w2[0] = 0;
4913 w1[3] = 0;
4914 w1[2] = 0;
4915 w1[1] = 0;
4916 w1[0] = 0;
4917 w0[3] = 0;
4918 w0[2] = 0;
4919 w0[1] = 0;
4920 w0[0] = 0;
4921 break;
4922
4923 case 12:
4924 w3[2] = amd_bytealign (w0[1], 0, offset);
4925 w3[1] = amd_bytealign (w0[0], w0[1], offset);
4926 w3[0] = amd_bytealign ( 0, w0[0], offset);
4927 w2[3] = 0;
4928 w2[2] = 0;
4929 w2[1] = 0;
4930 w2[0] = 0;
4931 w1[3] = 0;
4932 w1[2] = 0;
4933 w1[1] = 0;
4934 w1[0] = 0;
4935 w0[3] = 0;
4936 w0[2] = 0;
4937 w0[1] = 0;
4938 w0[0] = 0;
4939 break;
4940
4941 case 13:
4942 w3[2] = amd_bytealign (w0[0], 0, offset);
4943 w3[1] = amd_bytealign ( 0, w0[0], offset);
4944 w3[0] = 0;
4945 w2[3] = 0;
4946 w2[2] = 0;
4947 w2[1] = 0;
4948 w2[0] = 0;
4949 w1[3] = 0;
4950 w1[2] = 0;
4951 w1[1] = 0;
4952 w1[0] = 0;
4953 w0[3] = 0;
4954 w0[2] = 0;
4955 w0[1] = 0;
4956 w0[0] = 0;
4957 break;
4958 }
4959 #endif
4960
4961 #ifdef IS_NV
4962 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
4963
4964 switch (offset / 4)
4965 {
4966 case 0:
4967 w3[1] = __byte_perm (w3[1], w3[0], selector);
4968 w3[0] = __byte_perm (w3[0], w2[3], selector);
4969 w2[3] = __byte_perm (w2[3], w2[2], selector);
4970 w2[2] = __byte_perm (w2[2], w2[1], selector);
4971 w2[1] = __byte_perm (w2[1], w2[0], selector);
4972 w2[0] = __byte_perm (w2[0], w1[3], selector);
4973 w1[3] = __byte_perm (w1[3], w1[2], selector);
4974 w1[2] = __byte_perm (w1[2], w1[1], selector);
4975 w1[1] = __byte_perm (w1[1], w1[0], selector);
4976 w1[0] = __byte_perm (w1[0], w0[3], selector);
4977 w0[3] = __byte_perm (w0[3], w0[2], selector);
4978 w0[2] = __byte_perm (w0[2], w0[1], selector);
4979 w0[1] = __byte_perm (w0[1], w0[0], selector);
4980 w0[0] = __byte_perm (w0[0], 0, selector);
4981 break;
4982
4983 case 1:
4984 w3[1] = __byte_perm (w3[0], w2[3], selector);
4985 w3[0] = __byte_perm (w2[3], w2[2], selector);
4986 w2[3] = __byte_perm (w2[2], w2[1], selector);
4987 w2[2] = __byte_perm (w2[1], w2[0], selector);
4988 w2[1] = __byte_perm (w2[0], w1[3], selector);
4989 w2[0] = __byte_perm (w1[3], w1[2], selector);
4990 w1[3] = __byte_perm (w1[2], w1[1], selector);
4991 w1[2] = __byte_perm (w1[1], w1[0], selector);
4992 w1[1] = __byte_perm (w1[0], w0[3], selector);
4993 w1[0] = __byte_perm (w0[3], w0[2], selector);
4994 w0[3] = __byte_perm (w0[2], w0[1], selector);
4995 w0[2] = __byte_perm (w0[1], w0[0], selector);
4996 w0[1] = __byte_perm (w0[0], 0, selector);
4997 w0[0] = 0;
4998 break;
4999
5000 case 2:
5001 w3[1] = __byte_perm (w2[3], w2[2], selector);
5002 w3[0] = __byte_perm (w2[2], w2[1], selector);
5003 w2[3] = __byte_perm (w2[1], w2[0], selector);
5004 w2[2] = __byte_perm (w2[0], w1[3], selector);
5005 w2[1] = __byte_perm (w1[3], w1[2], selector);
5006 w2[0] = __byte_perm (w1[2], w1[1], selector);
5007 w1[3] = __byte_perm (w1[1], w1[0], selector);
5008 w1[2] = __byte_perm (w1[0], w0[3], selector);
5009 w1[1] = __byte_perm (w0[3], w0[2], selector);
5010 w1[0] = __byte_perm (w0[2], w0[1], selector);
5011 w0[3] = __byte_perm (w0[1], w0[0], selector);
5012 w0[2] = __byte_perm (w0[0], 0, selector);
5013 w0[1] = 0;
5014 w0[0] = 0;
5015 break;
5016
5017 case 3:
5018 w3[1] = __byte_perm (w2[2], w2[1], selector);
5019 w3[0] = __byte_perm (w2[1], w2[0], selector);
5020 w2[3] = __byte_perm (w2[0], w1[3], selector);
5021 w2[2] = __byte_perm (w1[3], w1[2], selector);
5022 w2[1] = __byte_perm (w1[2], w1[1], selector);
5023 w2[0] = __byte_perm (w1[1], w1[0], selector);
5024 w1[3] = __byte_perm (w1[0], w0[3], selector);
5025 w1[2] = __byte_perm (w0[3], w0[2], selector);
5026 w1[1] = __byte_perm (w0[2], w0[1], selector);
5027 w1[0] = __byte_perm (w0[1], w0[0], selector);
5028 w0[3] = __byte_perm (w0[0], 0, selector);
5029 w0[2] = 0;
5030 w0[1] = 0;
5031 w0[0] = 0;
5032 break;
5033
5034 case 4:
5035 w3[1] = __byte_perm (w2[1], w2[0], selector);
5036 w3[0] = __byte_perm (w2[0], w1[3], selector);
5037 w2[3] = __byte_perm (w1[3], w1[2], selector);
5038 w2[2] = __byte_perm (w1[2], w1[1], selector);
5039 w2[1] = __byte_perm (w1[1], w1[0], selector);
5040 w2[0] = __byte_perm (w1[0], w0[3], selector);
5041 w1[3] = __byte_perm (w0[3], w0[2], selector);
5042 w1[2] = __byte_perm (w0[2], w0[1], selector);
5043 w1[1] = __byte_perm (w0[1], w0[0], selector);
5044 w1[0] = __byte_perm (w0[0], 0, selector);
5045 w0[3] = 0;
5046 w0[2] = 0;
5047 w0[1] = 0;
5048 w0[0] = 0;
5049 break;
5050
5051 case 5:
5052 w3[1] = __byte_perm (w2[0], w1[3], selector);
5053 w3[0] = __byte_perm (w1[3], w1[2], selector);
5054 w2[3] = __byte_perm (w1[2], w1[1], selector);
5055 w2[2] = __byte_perm (w1[1], w1[0], selector);
5056 w2[1] = __byte_perm (w1[0], w0[3], selector);
5057 w2[0] = __byte_perm (w0[3], w0[2], selector);
5058 w1[3] = __byte_perm (w0[2], w0[1], selector);
5059 w1[2] = __byte_perm (w0[1], w0[0], selector);
5060 w1[1] = __byte_perm (w0[0], 0, selector);
5061 w1[0] = 0;
5062 w0[3] = 0;
5063 w0[2] = 0;
5064 w0[1] = 0;
5065 w0[0] = 0;
5066 break;
5067
5068 case 6:
5069 w3[1] = __byte_perm (w1[3], w1[2], selector);
5070 w3[0] = __byte_perm (w1[2], w1[1], selector);
5071 w2[3] = __byte_perm (w1[1], w1[0], selector);
5072 w2[2] = __byte_perm (w1[0], w0[3], selector);
5073 w2[1] = __byte_perm (w0[3], w0[2], selector);
5074 w2[0] = __byte_perm (w0[2], w0[1], selector);
5075 w1[3] = __byte_perm (w0[1], w0[0], selector);
5076 w1[2] = __byte_perm (w0[0], 0, selector);
5077 w1[1] = 0;
5078 w1[0] = 0;
5079 w0[3] = 0;
5080 w0[2] = 0;
5081 w0[1] = 0;
5082 w0[0] = 0;
5083 break;
5084
5085 case 7:
5086 w3[1] = __byte_perm (w1[2], w1[1], selector);
5087 w3[0] = __byte_perm (w1[1], w1[0], selector);
5088 w2[3] = __byte_perm (w1[0], w0[3], selector);
5089 w2[2] = __byte_perm (w0[3], w0[2], selector);
5090 w2[1] = __byte_perm (w0[2], w0[1], selector);
5091 w2[0] = __byte_perm (w0[1], w0[0], selector);
5092 w1[3] = __byte_perm (w0[0], 0, selector);
5093 w1[2] = 0;
5094 w1[1] = 0;
5095 w1[0] = 0;
5096 w0[3] = 0;
5097 w0[2] = 0;
5098 w0[1] = 0;
5099 w0[0] = 0;
5100 break;
5101
5102 case 8:
5103 w3[1] = __byte_perm (w1[1], w1[0], selector);
5104 w3[0] = __byte_perm (w1[0], w0[3], selector);
5105 w2[3] = __byte_perm (w0[3], w0[2], selector);
5106 w2[2] = __byte_perm (w0[2], w0[1], selector);
5107 w2[1] = __byte_perm (w0[1], w0[0], selector);
5108 w2[0] = __byte_perm (w0[0], 0, selector);
5109 w1[3] = 0;
5110 w1[2] = 0;
5111 w1[1] = 0;
5112 w1[0] = 0;
5113 w0[3] = 0;
5114 w0[2] = 0;
5115 w0[1] = 0;
5116 w0[0] = 0;
5117 break;
5118
5119 case 9:
5120 w3[1] = __byte_perm (w1[0], w0[3], selector);
5121 w3[0] = __byte_perm (w0[3], w0[2], selector);
5122 w2[3] = __byte_perm (w0[2], w0[1], selector);
5123 w2[2] = __byte_perm (w0[1], w0[0], selector);
5124 w2[1] = __byte_perm (w0[0], 0, selector);
5125 w2[0] = 0;
5126 w1[3] = 0;
5127 w1[2] = 0;
5128 w1[1] = 0;
5129 w1[0] = 0;
5130 w0[3] = 0;
5131 w0[2] = 0;
5132 w0[1] = 0;
5133 w0[0] = 0;
5134 break;
5135
5136 case 10:
5137 w3[1] = __byte_perm (w0[3], w0[2], selector);
5138 w3[0] = __byte_perm (w0[2], w0[1], selector);
5139 w2[3] = __byte_perm (w0[1], w0[0], selector);
5140 w2[2] = __byte_perm (w0[0], 0, selector);
5141 w2[1] = 0;
5142 w2[0] = 0;
5143 w1[3] = 0;
5144 w1[2] = 0;
5145 w1[1] = 0;
5146 w1[0] = 0;
5147 w0[3] = 0;
5148 w0[2] = 0;
5149 w0[1] = 0;
5150 w0[0] = 0;
5151 break;
5152
5153 case 11:
5154 w3[1] = __byte_perm (w0[2], w0[1], selector);
5155 w3[0] = __byte_perm (w0[1], w0[0], selector);
5156 w2[3] = __byte_perm (w0[0], 0, selector);
5157 w2[2] = 0;
5158 w2[1] = 0;
5159 w2[0] = 0;
5160 w1[3] = 0;
5161 w1[2] = 0;
5162 w1[1] = 0;
5163 w1[0] = 0;
5164 w0[3] = 0;
5165 w0[2] = 0;
5166 w0[1] = 0;
5167 w0[0] = 0;
5168 break;
5169
5170 case 12:
5171 w3[1] = __byte_perm (w0[1], w0[0], selector);
5172 w3[0] = __byte_perm (w0[0], 0, selector);
5173 w2[3] = 0;
5174 w2[2] = 0;
5175 w2[1] = 0;
5176 w2[0] = 0;
5177 w1[3] = 0;
5178 w1[2] = 0;
5179 w1[1] = 0;
5180 w1[0] = 0;
5181 w0[3] = 0;
5182 w0[2] = 0;
5183 w0[1] = 0;
5184 w0[0] = 0;
5185 break;
5186
5187 case 13:
5188 w3[1] = __byte_perm (w0[0], 0, selector);
5189 w3[0] = 0;
5190 w2[3] = 0;
5191 w2[2] = 0;
5192 w2[1] = 0;
5193 w2[0] = 0;
5194 w1[3] = 0;
5195 w1[2] = 0;
5196 w1[1] = 0;
5197 w1[0] = 0;
5198 w0[3] = 0;
5199 w0[2] = 0;
5200 w0[1] = 0;
5201 w0[0] = 0;
5202 break;
5203 }
5204 #endif
5205 }
5206
5207 /* not needed anymore?
5208
5209 // before: append_0x80_2_be
5210 static void append_0x80_2x4_be (u32 w0[4], u32 w1[4], const u32 offset)
5211 {
5212 switch (offset)
5213 {
5214 case 0:
5215 w0[0] |= 0x80000000;
5216 break;
5217
5218 case 1:
5219 w0[0] |= 0x800000;
5220 break;
5221
5222 case 2:
5223 w0[0] |= 0x8000;
5224 break;
5225
5226 case 3:
5227 w0[0] |= 0x80;
5228 break;
5229
5230 case 4:
5231 w0[1] |= 0x80000000;
5232 break;
5233
5234 case 5:
5235 w0[1] |= 0x800000;
5236 break;
5237
5238 case 6:
5239 w0[1] |= 0x8000;
5240 break;
5241
5242 case 7:
5243 w0[1] |= 0x80;
5244 break;
5245
5246 case 8:
5247 w0[2] |= 0x80000000;
5248 break;
5249
5250 case 9:
5251 w0[2] |= 0x800000;
5252 break;
5253
5254 case 10:
5255 w0[2] |= 0x8000;
5256 break;
5257
5258 case 11:
5259 w0[2] |= 0x80;
5260 break;
5261
5262 case 12:
5263 w0[3] |= 0x80000000;
5264 break;
5265
5266 case 13:
5267 w0[3] |= 0x800000;
5268 break;
5269
5270 case 14:
5271 w0[3] |= 0x8000;
5272 break;
5273
5274 case 15:
5275 w0[3] |= 0x80;
5276 break;
5277
5278 case 16:
5279 w1[0] |= 0x80000000;
5280 break;
5281
5282 case 17:
5283 w1[0] |= 0x800000;
5284 break;
5285
5286 case 18:
5287 w1[0] |= 0x8000;
5288 break;
5289
5290 case 19:
5291 w1[0] |= 0x80;
5292 break;
5293
5294 case 20:
5295 w1[1] |= 0x80000000;
5296 break;
5297
5298 case 21:
5299 w1[1] |= 0x800000;
5300 break;
5301
5302 case 22:
5303 w1[1] |= 0x8000;
5304 break;
5305
5306 case 23:
5307 w1[1] |= 0x80;
5308 break;
5309
5310 case 24:
5311 w1[2] |= 0x80000000;
5312 break;
5313
5314 case 25:
5315 w1[2] |= 0x800000;
5316 break;
5317
5318 case 26:
5319 w1[2] |= 0x8000;
5320 break;
5321
5322 case 27:
5323 w1[2] |= 0x80;
5324 break;
5325
5326 case 28:
5327 w1[3] |= 0x80000000;
5328 break;
5329
5330 case 29:
5331 w1[3] |= 0x800000;
5332 break;
5333
5334 case 30:
5335 w1[3] |= 0x8000;
5336 break;
5337
5338 case 31:
5339 w1[3] |= 0x80;
5340 break;
5341 }
5342 }
5343
5344 // before: append_0x80_8
5345 static void append_0x80_1x32 (u32 w[32], const u32 offset)
5346 {
5347 switch (offset)
5348 {
5349 case 0:
5350 w[ 0] = 0x80;
5351 break;
5352
5353 case 1:
5354 w[ 0] = w[ 0] | 0x8000;
5355 break;
5356
5357 case 2:
5358 w[ 0] = w[ 0] | 0x800000;
5359 break;
5360
5361 case 3:
5362 w[ 0] = w[ 0] | 0x80000000;
5363 break;
5364
5365 case 4:
5366 w[ 1] = 0x80;
5367 break;
5368
5369 case 5:
5370 w[ 1] = w[ 1] | 0x8000;
5371 break;
5372
5373 case 6:
5374 w[ 1] = w[ 1] | 0x800000;
5375 break;
5376
5377 case 7:
5378 w[ 1] = w[ 1] | 0x80000000;
5379 break;
5380
5381 case 8:
5382 w[ 2] = 0x80;
5383 break;
5384
5385 case 9:
5386 w[ 2] = w[ 2] | 0x8000;
5387 break;
5388
5389 case 10:
5390 w[ 2] = w[ 2] | 0x800000;
5391 break;
5392
5393 case 11:
5394 w[ 2] = w[ 2] | 0x80000000;
5395 break;
5396
5397 case 12:
5398 w[ 3] = 0x80;
5399 break;
5400
5401 case 13:
5402 w[ 3] = w[ 3] | 0x8000;
5403 break;
5404
5405 case 14:
5406 w[ 3] = w[ 3] | 0x800000;
5407 break;
5408
5409 case 15:
5410 w[ 3] = w[ 3] | 0x80000000;
5411 break;
5412
5413 case 16:
5414 w[ 4] = 0x80;
5415 break;
5416
5417 case 17:
5418 w[ 4] = w[ 4] | 0x8000;
5419 break;
5420
5421 case 18:
5422 w[ 4] = w[ 4] | 0x800000;
5423 break;
5424
5425 case 19:
5426 w[ 4] = w[ 4] | 0x80000000;
5427 break;
5428
5429 case 20:
5430 w[ 5] = 0x80;
5431 break;
5432
5433 case 21:
5434 w[ 5] = w[ 5] | 0x8000;
5435 break;
5436
5437 case 22:
5438 w[ 5] = w[ 5] | 0x800000;
5439 break;
5440
5441 case 23:
5442 w[ 5] = w[ 5] | 0x80000000;
5443 break;
5444
5445 case 24:
5446 w[ 6] = 0x80;
5447 break;
5448
5449 case 25:
5450 w[ 6] = w[ 6] | 0x8000;
5451 break;
5452
5453 case 26:
5454 w[ 6] = w[ 6] | 0x800000;
5455 break;
5456
5457 case 27:
5458 w[ 6] = w[ 6] | 0x80000000;
5459 break;
5460
5461 case 28:
5462 w[ 7] = 0x80;
5463 break;
5464
5465 case 29:
5466 w[ 7] = w[ 7] | 0x8000;
5467 break;
5468
5469 case 30:
5470 w[ 7] = w[ 7] | 0x800000;
5471 break;
5472
5473 case 31:
5474 w[ 7] = w[ 7] | 0x80000000;
5475 break;
5476
5477 case 32:
5478 w[ 8] = 0x80;
5479 break;
5480
5481 case 33:
5482 w[ 8] = w[ 8] | 0x8000;
5483 break;
5484
5485 case 34:
5486 w[ 8] = w[ 8] | 0x800000;
5487 break;
5488
5489 case 35:
5490 w[ 8] = w[ 8] | 0x80000000;
5491 break;
5492
5493 case 36:
5494 w[ 9] = 0x80;
5495 break;
5496
5497 case 37:
5498 w[ 9] = w[ 9] | 0x8000;
5499 break;
5500
5501 case 38:
5502 w[ 9] = w[ 9] | 0x800000;
5503 break;
5504
5505 case 39:
5506 w[ 9] = w[ 9] | 0x80000000;
5507 break;
5508
5509 case 40:
5510 w[10] = 0x80;
5511 break;
5512
5513 case 41:
5514 w[10] = w[10] | 0x8000;
5515 break;
5516
5517 case 42:
5518 w[10] = w[10] | 0x800000;
5519 break;
5520
5521 case 43:
5522 w[10] = w[10] | 0x80000000;
5523 break;
5524
5525 case 44:
5526 w[11] = 0x80;
5527 break;
5528
5529 case 45:
5530 w[11] = w[11] | 0x8000;
5531 break;
5532
5533 case 46:
5534 w[11] = w[11] | 0x800000;
5535 break;
5536
5537 case 47:
5538 w[11] = w[11] | 0x80000000;
5539 break;
5540
5541 case 48:
5542 w[12] = 0x80;
5543 break;
5544
5545 case 49:
5546 w[12] = w[12] | 0x8000;
5547 break;
5548
5549 case 50:
5550 w[12] = w[12] | 0x800000;
5551 break;
5552
5553 case 51:
5554 w[12] = w[12] | 0x80000000;
5555 break;
5556
5557 case 52:
5558 w[13] = 0x80;
5559 break;
5560
5561 case 53:
5562 w[13] = w[13] | 0x8000;
5563 break;
5564
5565 case 54:
5566 w[13] = w[13] | 0x800000;
5567 break;
5568
5569 case 55:
5570 w[13] = w[13] | 0x80000000;
5571 break;
5572
5573 case 56:
5574 w[14] = 0x80;
5575 break;
5576
5577 case 57:
5578 w[14] = w[14] | 0x8000;
5579 break;
5580
5581 case 58:
5582 w[14] = w[14] | 0x800000;
5583 break;
5584
5585 case 59:
5586 w[14] = w[14] | 0x80000000;
5587 break;
5588
5589 case 60:
5590 w[15] = 0x80;
5591 break;
5592
5593 case 61:
5594 w[15] = w[15] | 0x8000;
5595 break;
5596
5597 case 62:
5598 w[15] = w[15] | 0x800000;
5599 break;
5600
5601 case 63:
5602 w[15] = w[15] | 0x80000000;
5603 break;
5604
5605 case 64:
5606 w[16] = 0x80;
5607 break;
5608
5609 case 65:
5610 w[16] = w[16] | 0x8000;
5611 break;
5612
5613 case 66:
5614 w[16] = w[16] | 0x800000;
5615 break;
5616
5617 case 67:
5618 w[16] = w[16] | 0x80000000;
5619 break;
5620
5621 case 68:
5622 w[17] = 0x80;
5623 break;
5624
5625 case 69:
5626 w[17] = w[17] | 0x8000;
5627 break;
5628
5629 case 70:
5630 w[17] = w[17] | 0x800000;
5631 break;
5632
5633 case 71:
5634 w[17] = w[17] | 0x80000000;
5635 break;
5636
5637 case 72:
5638 w[18] = 0x80;
5639 break;
5640
5641 case 73:
5642 w[18] = w[18] | 0x8000;
5643 break;
5644
5645 case 74:
5646 w[18] = w[18] | 0x800000;
5647 break;
5648
5649 case 75:
5650 w[18] = w[18] | 0x80000000;
5651 break;
5652
5653 case 76:
5654 w[19] = 0x80;
5655 break;
5656
5657 case 77:
5658 w[19] = w[19] | 0x8000;
5659 break;
5660
5661 case 78:
5662 w[19] = w[19] | 0x800000;
5663 break;
5664
5665 case 79:
5666 w[19] = w[19] | 0x80000000;
5667 break;
5668
5669 case 80:
5670 w[20] = 0x80;
5671 break;
5672
5673 case 81:
5674 w[20] = w[20] | 0x8000;
5675 break;
5676
5677 case 82:
5678 w[20] = w[20] | 0x800000;
5679 break;
5680
5681 case 83:
5682 w[20] = w[20] | 0x80000000;
5683 break;
5684
5685 case 84:
5686 w[21] = 0x80;
5687 break;
5688
5689 case 85:
5690 w[21] = w[21] | 0x8000;
5691 break;
5692
5693 case 86:
5694 w[21] = w[21] | 0x800000;
5695 break;
5696
5697 case 87:
5698 w[21] = w[21] | 0x80000000;
5699 break;
5700
5701 case 88:
5702 w[22] = 0x80;
5703 break;
5704
5705 case 89:
5706 w[22] = w[22] | 0x8000;
5707 break;
5708
5709 case 90:
5710 w[22] = w[22] | 0x800000;
5711 break;
5712
5713 case 91:
5714 w[22] = w[22] | 0x80000000;
5715 break;
5716
5717 case 92:
5718 w[23] = 0x80;
5719 break;
5720
5721 case 93:
5722 w[23] = w[23] | 0x8000;
5723 break;
5724
5725 case 94:
5726 w[23] = w[23] | 0x800000;
5727 break;
5728
5729 case 95:
5730 w[23] = w[23] | 0x80000000;
5731 break;
5732
5733 case 96:
5734 w[24] = 0x80;
5735 break;
5736
5737 case 97:
5738 w[24] = w[24] | 0x8000;
5739 break;
5740
5741 case 98:
5742 w[24] = w[24] | 0x800000;
5743 break;
5744
5745 case 99:
5746 w[24] = w[24] | 0x80000000;
5747 break;
5748
5749 case 100:
5750 w[25] = 0x80;
5751 break;
5752
5753 case 101:
5754 w[25] = w[25] | 0x8000;
5755 break;
5756
5757 case 102:
5758 w[25] = w[25] | 0x800000;
5759 break;
5760
5761 case 103:
5762 w[25] = w[25] | 0x80000000;
5763 break;
5764
5765 case 104:
5766 w[26] = 0x80;
5767 break;
5768
5769 case 105:
5770 w[26] = w[26] | 0x8000;
5771 break;
5772
5773 case 106:
5774 w[26] = w[26] | 0x800000;
5775 break;
5776
5777 case 107:
5778 w[26] = w[26] | 0x80000000;
5779 break;
5780
5781 case 108:
5782 w[27] = 0x80;
5783 break;
5784
5785 case 109:
5786 w[27] = w[27] | 0x8000;
5787 break;
5788
5789 case 110:
5790 w[27] = w[27] | 0x800000;
5791 break;
5792
5793 case 111:
5794 w[27] = w[27] | 0x80000000;
5795 break;
5796
5797 case 112:
5798 w[28] = 0x80;
5799 break;
5800
5801 case 113:
5802 w[28] = w[28] | 0x8000;
5803 break;
5804
5805 case 114:
5806 w[28] = w[28] | 0x800000;
5807 break;
5808
5809 case 115:
5810 w[28] = w[28] | 0x80000000;
5811 break;
5812
5813 case 116:
5814 w[29] = 0x80;
5815 break;
5816
5817 case 117:
5818 w[29] = w[29] | 0x8000;
5819 break;
5820
5821 case 118:
5822 w[29] = w[29] | 0x800000;
5823 break;
5824
5825 case 119:
5826 w[29] = w[29] | 0x80000000;
5827 break;
5828
5829 case 120:
5830 w[30] = 0x80;
5831 break;
5832
5833 case 121:
5834 w[30] = w[30] | 0x8000;
5835 break;
5836
5837 case 122:
5838 w[30] = w[30] | 0x800000;
5839 break;
5840
5841 case 123:
5842 w[30] = w[30] | 0x80000000;
5843 break;
5844
5845 case 124:
5846 w[31] = 0x80;
5847 break;
5848
5849 case 125:
5850 w[31] = w[31] | 0x8000;
5851 break;
5852
5853 case 126:
5854 w[31] = w[31] | 0x800000;
5855 break;
5856
5857 case 127:
5858 w[31] = w[31] | 0x80000000;
5859 break;
5860 }
5861 }
5862
5863 // before: device_memcat2L
5864 static void memcat_c7_d1x2_sl1x2_sr1x2 (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2])
5865 {
5866 switch (offset)
5867 {
5868 case 1:
5869 dst0[0] = src_l0[0] | src_r0[0] << 8;
5870 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
5871 break;
5872
5873 case 2:
5874 dst0[0] = src_l0[0] | src_r0[0] << 16;
5875 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
5876 break;
5877
5878 case 3:
5879 dst0[0] = src_l0[0] | src_r0[0] << 24;
5880 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
5881 break;
5882
5883 case 4:
5884 dst0[1] = src_r0[0];
5885 break;
5886
5887 case 5:
5888 dst0[1] = src_l0[1] | src_r0[0] << 8;
5889 break;
5890
5891 case 6:
5892 dst0[1] = src_l0[1] | src_r0[0] << 16;
5893 break;
5894
5895 case 7:
5896 dst0[1] = src_l0[1] | src_r0[0] << 24;
5897 break;
5898 }
5899 }
5900
5901 // before: device_memcat4L
5902 static void memcat_c15_d1x4_sl1x4_sr1x4 (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4])
5903 {
5904 switch (offset)
5905 {
5906 case 1:
5907 dst0[0] = src_l0[0] | src_r0[0] << 8;
5908 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
5909 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
5910 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
5911 break;
5912
5913 case 2:
5914 dst0[0] = src_l0[0] | src_r0[0] << 16;
5915 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
5916 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
5917 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
5918 break;
5919
5920 case 3:
5921 dst0[0] = src_l0[0] | src_r0[0] << 24;
5922 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
5923 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
5924 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
5925 break;
5926
5927 case 4:
5928 dst0[1] = src_r0[0];
5929 dst0[2] = src_r0[1];
5930 dst0[3] = src_r0[2];
5931 break;
5932
5933 case 5:
5934 dst0[1] = src_l0[1] | src_r0[0] << 8;
5935 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
5936 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
5937 break;
5938
5939 case 6:
5940 dst0[1] = src_l0[1] | src_r0[0] << 16;
5941 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
5942 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
5943 break;
5944
5945 case 7:
5946 dst0[1] = src_l0[1] | src_r0[0] << 24;
5947 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
5948 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
5949 break;
5950
5951 case 8:
5952 dst0[2] = src_r0[0];
5953 dst0[3] = src_r0[1];
5954 break;
5955
5956 case 9:
5957 dst0[2] = src_l0[2] | src_r0[0] << 8;
5958 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
5959 break;
5960
5961 case 10:
5962 dst0[2] = src_l0[2] | src_r0[0] << 16;
5963 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
5964 break;
5965
5966 case 11:
5967 dst0[2] = src_l0[2] | src_r0[0] << 24;
5968 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
5969 break;
5970
5971 case 12:
5972 dst0[3] = src_r0[0];
5973 break;
5974
5975 case 13:
5976 dst0[3] = src_l0[3] | src_r0[0] << 8;
5977 break;
5978
5979 case 14:
5980 dst0[3] = src_l0[3] | src_r0[0] << 16;
5981 break;
5982
5983 case 15:
5984 dst0[3] = src_l0[3] | src_r0[0] << 24;
5985 break;
5986 }
5987 }
5988
5989 // before: device_memcat8L
5990 static void memcat_c31_d2x4_sl2x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4])
5991 {
5992 switch (offset)
5993 {
5994 case 1:
5995 dst0[0] = src_l0[0] | src_r0[0] << 8;
5996 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
5997 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
5998 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
5999 dst1[0] = src_r0[3] >> 24;
6000 break;
6001
6002 case 2:
6003 dst0[0] = src_l0[0] | src_r0[0] << 16;
6004 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
6005 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
6006 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
6007 dst1[0] = src_r0[3] >> 16;
6008 break;
6009
6010 case 3:
6011 dst0[0] = src_l0[0] | src_r0[0] << 24;
6012 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
6013 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
6014 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
6015 dst1[0] = src_r0[3] >> 8;
6016 break;
6017
6018 case 4:
6019 dst0[1] = src_r0[0];
6020 dst0[2] = src_r0[1];
6021 dst0[3] = src_r0[2];
6022 dst1[0] = src_r0[3];
6023 break;
6024
6025 case 5:
6026 dst0[1] = src_l0[1] | src_r0[0] << 8;
6027 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
6028 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
6029 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
6030 dst1[1] = src_r0[3] >> 24;
6031 break;
6032
6033 case 6:
6034 dst0[1] = src_l0[1] | src_r0[0] << 16;
6035 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
6036 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
6037 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
6038 dst1[1] = src_r0[3] >> 16;
6039 break;
6040
6041 case 7:
6042 dst0[1] = src_l0[1] | src_r0[0] << 24;
6043 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
6044 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
6045 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
6046 dst1[1] = src_r0[3] >> 8;
6047 break;
6048
6049 case 8:
6050 dst0[2] = src_r0[0];
6051 dst0[3] = src_r0[1];
6052 dst1[0] = src_r0[2];
6053 dst1[1] = src_r0[3];
6054 break;
6055
6056 case 9:
6057 dst0[2] = src_l0[2] | src_r0[0] << 8;
6058 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
6059 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
6060 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
6061 dst1[2] = src_r0[3] >> 24;
6062 break;
6063
6064 case 10:
6065 dst0[2] = src_l0[2] | src_r0[0] << 16;
6066 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
6067 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
6068 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
6069 dst1[2] = src_r0[3] >> 16;
6070 break;
6071
6072 case 11:
6073 dst0[2] = src_l0[2] | src_r0[0] << 24;
6074 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
6075 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
6076 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
6077 dst1[2] = src_r0[3] >> 8;
6078 break;
6079
6080 case 12:
6081 dst0[3] = src_r0[0];
6082 dst1[0] = src_r0[1];
6083 dst1[1] = src_r0[2];
6084 dst1[2] = src_r0[3];
6085 break;
6086
6087 case 13:
6088 dst0[3] = src_l0[3] | src_r0[0] << 8;
6089 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
6090 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
6091 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
6092 dst1[3] = src_r0[3] >> 24;
6093 break;
6094
6095 case 14:
6096 dst0[3] = src_l0[3] | src_r0[0] << 16;
6097 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
6098 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
6099 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
6100 dst1[3] = src_r0[3] >> 16;
6101 break;
6102
6103 case 15:
6104 dst0[3] = src_l0[3] | src_r0[0] << 24;
6105 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
6106 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
6107 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
6108 dst1[3] = src_r0[3] >> 8;
6109 break;
6110
6111 case 16:
6112 dst1[0] = src_r0[0];
6113 dst1[1] = src_r0[1];
6114 dst1[2] = src_r0[2];
6115 dst1[3] = src_r0[3];
6116 break;
6117
6118 case 17:
6119 dst1[0] = src_l1[0] | src_r0[0] << 8;
6120 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
6121 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
6122 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
6123 break;
6124
6125 case 18:
6126 dst1[0] = src_l1[0] | src_r0[0] << 16;
6127 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
6128 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
6129 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
6130 break;
6131
6132 case 19:
6133 dst1[0] = src_l1[0] | src_r0[0] << 24;
6134 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
6135 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
6136 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
6137 break;
6138
6139 case 20:
6140 dst1[1] = src_r0[0];
6141 dst1[2] = src_r0[1];
6142 dst1[3] = src_r0[2];
6143 break;
6144
6145 case 21:
6146 dst1[1] = src_l1[1] | src_r0[0] << 8;
6147 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
6148 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
6149 break;
6150
6151 case 22:
6152 dst1[1] = src_l1[1] | src_r0[0] << 16;
6153 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
6154 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
6155 break;
6156
6157 case 23:
6158 dst1[1] = src_l1[1] | src_r0[0] << 24;
6159 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
6160 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
6161 break;
6162
6163 case 24:
6164 dst1[2] = src_r0[0];
6165 dst1[3] = src_r0[1];
6166 break;
6167
6168 case 25:
6169 dst1[2] = src_l1[2] | src_r0[0] << 8;
6170 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
6171 break;
6172
6173 case 26:
6174 dst1[2] = src_l1[2] | src_r0[0] << 16;
6175 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
6176 break;
6177
6178 case 27:
6179 dst1[2] = src_l1[2] | src_r0[0] << 24;
6180 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
6181 break;
6182
6183 case 28:
6184 dst1[3] = src_r0[0];
6185 break;
6186
6187 case 29:
6188 dst1[3] = src_l1[3] | src_r0[0] << 8;
6189 break;
6190
6191 case 30:
6192 dst1[3] = src_l1[3] | src_r0[0] << 16;
6193 break;
6194
6195 case 31:
6196 dst1[3] = src_l1[3] | src_r0[0] << 24;
6197 break;
6198 }
6199 }
6200
6201 // before: device_memcat12L
6202 static void memcat_c47_d3x4_sl3x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4])
6203 {
6204 switch (offset)
6205 {
6206 case 1:
6207 dst0[0] = src_l0[0] | src_r0[0] << 8;
6208 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
6209 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
6210 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
6211 dst1[0] = src_r0[3] >> 24;
6212 break;
6213
6214 case 2:
6215 dst0[0] = src_l0[0] | src_r0[0] << 16;
6216 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
6217 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
6218 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
6219 dst1[0] = src_r0[3] >> 16;
6220 break;
6221
6222 case 3:
6223 dst0[0] = src_l0[0] | src_r0[0] << 24;
6224 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
6225 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
6226 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
6227 dst1[0] = src_r0[3] >> 8;
6228 break;
6229
6230 case 4:
6231 dst0[1] = src_r0[0];
6232 dst0[2] = src_r0[1];
6233 dst0[3] = src_r0[2];
6234 dst1[0] = src_r0[3];
6235 break;
6236
6237 case 5:
6238 dst0[1] = src_l0[1] | src_r0[0] << 8;
6239 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
6240 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
6241 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
6242 dst1[1] = src_r0[3] >> 24;
6243 break;
6244
6245 case 6:
6246 dst0[1] = src_l0[1] | src_r0[0] << 16;
6247 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
6248 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
6249 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
6250 dst1[1] = src_r0[3] >> 16;
6251 break;
6252
6253 case 7:
6254 dst0[1] = src_l0[1] | src_r0[0] << 24;
6255 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
6256 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
6257 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
6258 dst1[1] = src_r0[3] >> 8;
6259 break;
6260
6261 case 8:
6262 dst0[2] = src_r0[0];
6263 dst0[3] = src_r0[1];
6264 dst1[0] = src_r0[2];
6265 dst1[1] = src_r0[3];
6266 break;
6267
6268 case 9:
6269 dst0[2] = src_l0[2] | src_r0[0] << 8;
6270 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
6271 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
6272 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
6273 dst1[2] = src_r0[3] >> 24;
6274 break;
6275
6276 case 10:
6277 dst0[2] = src_l0[2] | src_r0[0] << 16;
6278 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
6279 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
6280 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
6281 dst1[2] = src_r0[3] >> 16;
6282 break;
6283
6284 case 11:
6285 dst0[2] = src_l0[2] | src_r0[0] << 24;
6286 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
6287 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
6288 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
6289 dst1[2] = src_r0[3] >> 8;
6290 break;
6291
6292 case 12:
6293 dst0[3] = src_r0[0];
6294 dst1[0] = src_r0[1];
6295 dst1[1] = src_r0[2];
6296 dst1[2] = src_r0[3];
6297 break;
6298
6299 case 13:
6300 dst0[3] = src_l0[3] | src_r0[0] << 8;
6301 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
6302 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
6303 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
6304 dst1[3] = src_r0[3] >> 24;
6305 break;
6306
6307 case 14:
6308 dst0[3] = src_l0[3] | src_r0[0] << 16;
6309 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
6310 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
6311 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
6312 dst1[3] = src_r0[3] >> 16;
6313 break;
6314
6315 case 15:
6316 dst0[3] = src_l0[3] | src_r0[0] << 24;
6317 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
6318 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
6319 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
6320 dst1[3] = src_r0[3] >> 8;
6321 break;
6322
6323 case 16:
6324 dst1[0] = src_r0[0];
6325 dst1[1] = src_r0[1];
6326 dst1[2] = src_r0[2];
6327 dst1[3] = src_r0[3];
6328 break;
6329
6330 case 17:
6331 dst1[0] = src_l1[0] | src_r0[0] << 8;
6332 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
6333 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
6334 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
6335 dst2[0] = src_r0[3] >> 24;
6336 break;
6337
6338 case 18:
6339 dst1[0] = src_l1[0] | src_r0[0] << 16;
6340 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
6341 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
6342 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
6343 dst2[0] = src_r0[3] >> 16;
6344 break;
6345
6346 case 19:
6347 dst1[0] = src_l1[0] | src_r0[0] << 24;
6348 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
6349 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
6350 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
6351 dst2[0] = src_r0[3] >> 8;
6352 break;
6353
6354 case 20:
6355 dst1[1] = src_r0[0];
6356 dst1[2] = src_r0[1];
6357 dst1[3] = src_r0[2];
6358 dst2[0] = src_r0[3];
6359 break;
6360
6361 case 21:
6362 dst1[1] = src_l1[1] | src_r0[0] << 8;
6363 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
6364 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
6365 dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
6366 dst2[1] = src_r0[3] >> 24;
6367 break;
6368
6369 case 22:
6370 dst1[1] = src_l1[1] | src_r0[0] << 16;
6371 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
6372 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
6373 dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
6374 dst2[1] = src_r0[3] >> 16;
6375 break;
6376
6377 case 23:
6378 dst1[1] = src_l1[1] | src_r0[0] << 24;
6379 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
6380 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
6381 dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
6382 dst2[1] = src_r0[3] >> 8;
6383 break;
6384
6385 case 24:
6386 dst1[2] = src_r0[0];
6387 dst1[3] = src_r0[1];
6388 dst2[0] = src_r0[2];
6389 dst2[1] = src_r0[3];
6390 break;
6391
6392 case 25:
6393 dst1[2] = src_l1[2] | src_r0[0] << 8;
6394 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
6395 dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
6396 dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
6397 dst2[2] = src_r0[3] >> 24;
6398 break;
6399
6400 case 26:
6401 dst1[2] = src_l1[2] | src_r0[0] << 16;
6402 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
6403 dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
6404 dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
6405 dst2[2] = src_r0[3] >> 16;
6406 break;
6407
6408 case 27:
6409 dst1[2] = src_l1[2] | src_r0[0] << 24;
6410 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
6411 dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
6412 dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
6413 dst2[2] = src_r0[3] >> 8;
6414 break;
6415
6416 case 28:
6417 dst1[3] = src_r0[0];
6418 dst2[0] = src_r0[1];
6419 dst2[1] = src_r0[2];
6420 dst2[2] = src_r0[3];
6421 break;
6422
6423 case 29:
6424 dst1[3] = src_l1[3] | src_r0[0] << 8;
6425 dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
6426 dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
6427 dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
6428 dst2[3] = src_r0[3] >> 24;
6429 break;
6430
6431 case 30:
6432 dst1[3] = src_l1[3] | src_r0[0] << 16;
6433 dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
6434 dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
6435 dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
6436 dst2[3] = src_r0[3] >> 16;
6437 break;
6438
6439 case 31:
6440 dst1[3] = src_l1[3] | src_r0[0] << 24;
6441 dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
6442 dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
6443 dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
6444 dst2[3] = src_r0[3] >> 8;
6445 break;
6446
6447 case 32:
6448 dst2[0] = src_r0[0];
6449 dst2[1] = src_r0[1];
6450 dst2[2] = src_r0[2];
6451 dst2[3] = src_r0[3];
6452 break;
6453
6454 case 33:
6455 dst2[0] = src_l2[0] | src_r0[0] << 8;
6456 dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
6457 dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
6458 dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
6459 break;
6460
6461 case 34:
6462 dst2[0] = src_l2[0] | src_r0[0] << 16;
6463 dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
6464 dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
6465 dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
6466 break;
6467
6468 case 35:
6469 dst2[0] = src_l2[0] | src_r0[0] << 24;
6470 dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
6471 dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
6472 dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
6473 break;
6474
6475 case 36:
6476 dst2[1] = src_r0[0];
6477 dst2[2] = src_r0[1];
6478 dst2[3] = src_r0[2];
6479 break;
6480
6481 case 37:
6482 dst2[1] = src_l2[1] | src_r0[0] << 8;
6483 dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
6484 dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
6485 break;
6486
6487 case 38:
6488 dst2[1] = src_l2[1] | src_r0[0] << 16;
6489 dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
6490 dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
6491 break;
6492
6493 case 39:
6494 dst2[1] = src_l2[1] | src_r0[0] << 24;
6495 dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
6496 dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
6497 break;
6498
6499 case 40:
6500 dst2[2] = src_r0[0];
6501 dst2[3] = src_r0[1];
6502 break;
6503
6504 case 41:
6505 dst2[2] = src_l2[2] | src_r0[0] << 8;
6506 dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
6507 break;
6508
6509 case 42:
6510 dst2[2] = src_l2[2] | src_r0[0] << 16;
6511 dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
6512 break;
6513
6514 case 43:
6515 dst2[2] = src_l2[2] | src_r0[0] << 24;
6516 dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
6517 break;
6518
6519 case 44:
6520 dst2[3] = src_r0[0];
6521 break;
6522
6523 case 45:
6524 dst2[3] = src_l2[3] | src_r0[0] << 8;
6525 break;
6526
6527 case 46:
6528 dst2[3] = src_l2[3] | src_r0[0] << 16;
6529 break;
6530
6531 case 47:
6532 dst2[3] = src_l2[3] | src_r0[0] << 24;
6533 break;
6534 }
6535 }
6536
6537 // before: device_memcat12L
6538 static void memcat_c47_d3x4_sl3x4_sr2x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4])
6539 {
6540 switch (offset)
6541 {
6542 case 0:
6543 dst0[0] = src_r0[0];
6544 dst0[1] = src_r0[1];
6545 dst0[2] = src_r0[2];
6546 dst0[3] = src_r0[3];
6547 dst1[0] = src_r1[0];
6548 dst1[1] = src_r1[1];
6549 dst1[2] = src_r1[2];
6550 dst1[3] = src_r1[3];
6551 break;
6552
6553 case 1:
6554 dst0[0] = src_l0[0] | src_r0[0] << 8;
6555 dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
6556 dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
6557 dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
6558 dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8;
6559 dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8;
6560 dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8;
6561 dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8;
6562 dst2[0] = src_r1[3] >> 24;
6563 break;
6564
6565 case 2:
6566 dst0[0] = src_l0[0] | src_r0[0] << 16;
6567 dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
6568 dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
6569 dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
6570 dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16;
6571 dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16;
6572 dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16;
6573 dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16;
6574 dst2[0] = src_r1[3] >> 16;
6575 break;
6576
6577 case 3:
6578 dst0[0] = src_l0[0] | src_r0[0] << 24;
6579 dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
6580 dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
6581 dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
6582 dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24;
6583 dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24;
6584 dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24;
6585 dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24;
6586 dst2[0] = src_r1[3] >> 8;
6587 break;
6588
6589 case 4:
6590 dst0[1] = src_r0[0];
6591 dst0[2] = src_r0[1];
6592 dst0[3] = src_r0[2];
6593 dst1[0] = src_r0[3];
6594 dst1[1] = src_r1[0];
6595 dst1[2] = src_r1[1];
6596 dst1[3] = src_r1[2];
6597 dst2[0] = src_r1[3];
6598 break;
6599
6600 case 5:
6601 dst0[1] = src_l0[1] | src_r0[0] << 8;
6602 dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
6603 dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
6604 dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
6605 dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8;
6606 dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8;
6607 dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8;
6608 dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8;
6609 dst2[1] = src_r1[3] >> 24;
6610 break;
6611
6612 case 6:
6613 dst0[1] = src_l0[1] | src_r0[0] << 16;
6614 dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
6615 dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
6616 dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
6617 dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16;
6618 dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16;
6619 dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16;
6620 dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16;
6621 dst2[1] = src_r1[3] >> 16;
6622 break;
6623
6624 case 7:
6625 dst0[1] = src_l0[1] | src_r0[0] << 24;
6626 dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
6627 dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
6628 dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
6629 dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24;
6630 dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24;
6631 dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24;
6632 dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24;
6633 dst2[1] = src_r1[3] >> 8;
6634 break;
6635
6636 case 8:
6637 dst0[2] = src_r0[0];
6638 dst0[3] = src_r0[1];
6639 dst1[0] = src_r0[2];
6640 dst1[1] = src_r0[3];
6641 dst1[2] = src_r1[0];
6642 dst1[3] = src_r1[1];
6643 dst2[0] = src_r1[2];
6644 dst2[1] = src_r1[3];
6645 break;
6646
6647 case 9:
6648 dst0[2] = src_l0[2] | src_r0[0] << 8;
6649 dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
6650 dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
6651 dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
6652 dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8;
6653 dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8;
6654 dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8;
6655 dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8;
6656 dst2[2] = src_r1[3] >> 24;
6657 break;
6658
6659 case 10:
6660 dst0[2] = src_l0[2] | src_r0[0] << 16;
6661 dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
6662 dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
6663 dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
6664 dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16;
6665 dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16;
6666 dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16;
6667 dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16;
6668 dst2[2] = src_r1[3] >> 16;
6669 break;
6670
6671 case 11:
6672 dst0[2] = src_l0[2] | src_r0[0] << 24;
6673 dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
6674 dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
6675 dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
6676 dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24;
6677 dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24;
6678 dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24;
6679 dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24;
6680 dst2[2] = src_r1[3] >> 8;
6681 break;
6682
6683 case 12:
6684 dst0[3] = src_r0[0];
6685 dst1[0] = src_r0[1];
6686 dst1[1] = src_r0[2];
6687 dst1[2] = src_r0[3];
6688 dst1[3] = src_r1[0];
6689 dst2[0] = src_r1[1];
6690 dst2[1] = src_r1[2];
6691 dst2[2] = src_r1[3];
6692 break;
6693
6694 case 13:
6695 dst0[3] = src_l0[3] | src_r0[0] << 8;
6696 dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
6697 dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
6698 dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
6699 dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8;
6700 dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8;
6701 dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8;
6702 dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8;
6703 dst2[3] = src_r1[3] >> 24;
6704 break;
6705
6706 case 14:
6707 dst0[3] = src_l0[3] | src_r0[0] << 16;
6708 dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
6709 dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
6710 dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
6711 dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16;
6712 dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16;
6713 dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16;
6714 dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16;
6715 dst2[3] = src_r1[3] >> 16;
6716 break;
6717
6718 case 15:
6719 dst0[3] = src_l0[3] | src_r0[0] << 24;
6720 dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
6721 dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
6722 dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
6723 dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24;
6724 dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24;
6725 dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24;
6726 dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24;
6727 dst2[3] = src_r1[3] >> 8;
6728 break;
6729
6730 case 16:
6731 dst1[0] = src_r0[0];
6732 dst1[1] = src_r0[1];
6733 dst1[2] = src_r0[2];
6734 dst1[3] = src_r0[3];
6735 dst2[0] = src_r1[0];
6736 dst2[1] = src_r1[1];
6737 dst2[2] = src_r1[2];
6738 dst2[3] = src_r1[3];
6739 break;
6740
6741 case 17:
6742 dst1[0] = src_l1[0] | src_r0[0] << 8;
6743 dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
6744 dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
6745 dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
6746 dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8;
6747 dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8;
6748 dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8;
6749 dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8;
6750 break;
6751
6752 case 18:
6753 dst1[0] = src_l1[0] | src_r0[0] << 16;
6754 dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
6755 dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
6756 dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
6757 dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16;
6758 dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16;
6759 dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16;
6760 dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16;
6761 break;
6762
6763 case 19:
6764 dst1[0] = src_l1[0] | src_r0[0] << 24;
6765 dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
6766 dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
6767 dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
6768 dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24;
6769 dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24;
6770 dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24;
6771 dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24;
6772 break;
6773
6774 case 20:
6775 dst1[1] = src_r1[0];
6776 dst1[2] = src_r0[1];
6777 dst1[3] = src_r0[2];
6778 dst2[0] = src_r0[3];
6779 dst2[1] = src_r1[0];
6780 dst2[2] = src_r1[1];
6781 dst2[3] = src_r1[2];
6782 break;
6783
6784 case 21:
6785 dst1[1] = src_l1[1] | src_r0[0] << 8;
6786 dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
6787 dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
6788 dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
6789 dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8;
6790 dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8;
6791 dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8;
6792 break;
6793
6794 case 22:
6795 dst1[1] = src_l1[1] | src_r0[0] << 16;
6796 dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
6797 dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
6798 dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
6799 dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16;
6800 dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16;
6801 dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16;
6802 break;
6803
6804 case 23:
6805 dst1[1] = src_l1[1] | src_r0[0] << 24;
6806 dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
6807 dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
6808 dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
6809 dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24;
6810 dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24;
6811 dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24;
6812 break;
6813
6814 case 24:
6815 dst1[2] = src_r1[0];
6816 dst1[3] = src_r0[1];
6817 dst2[0] = src_r0[2];
6818 dst2[1] = src_r0[3];
6819 dst2[2] = src_r1[0];
6820 dst2[3] = src_r1[1];
6821 break;
6822
6823 case 25:
6824 dst1[2] = src_l1[2] | src_r0[0] << 8;
6825 dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
6826 dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
6827 dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
6828 dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8;
6829 dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8;
6830 break;
6831
6832 case 26:
6833 dst1[2] = src_l1[2] | src_r0[0] << 16;
6834 dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
6835 dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
6836 dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
6837 dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16;
6838 dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16;
6839 break;
6840
6841 case 27:
6842 dst1[2] = src_l1[2] | src_r0[0] << 24;
6843 dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
6844 dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
6845 dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
6846 dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24;
6847 dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24;
6848 break;
6849
6850 case 28:
6851 dst1[3] = src_r1[0];
6852 dst2[0] = src_r0[1];
6853 dst2[1] = src_r0[2];
6854 dst2[2] = src_r0[3];
6855 dst2[3] = src_r1[0];
6856 break;
6857
6858 case 29:
6859 dst1[3] = src_l1[3] | src_r0[0] << 8;
6860 dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
6861 dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
6862 dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
6863 dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8;
6864 break;
6865
6866 case 30:
6867 dst1[3] = src_l1[3] | src_r0[0] << 16;
6868 dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
6869 dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
6870 dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
6871 dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16;
6872 break;
6873
6874 case 31:
6875 dst1[3] = src_l1[3] | src_r0[0] << 24;
6876 dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
6877 dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
6878 dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
6879 dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24;
6880 break;
6881
6882 case 32:
6883 dst2[0] = src_r0[0];
6884 dst2[1] = src_r0[1];
6885 dst2[2] = src_r0[2];
6886 dst2[3] = src_r0[3];
6887 break;
6888
6889 case 33:
6890 dst2[0] = src_l2[0] | src_r0[0] << 8;
6891 dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
6892 dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
6893 dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
6894 break;
6895
6896 case 34:
6897 dst2[0] = src_l2[0] | src_r0[0] << 16;
6898 dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
6899 dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
6900 dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
6901 break;
6902
6903 case 35:
6904 dst2[0] = src_l2[0] | src_r0[0] << 24;
6905 dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
6906 dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
6907 dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
6908 break;
6909
6910 case 36:
6911 dst2[1] = src_r0[0];
6912 dst2[2] = src_r0[1];
6913 dst2[3] = src_r0[2];
6914 break;
6915
6916 case 37:
6917 dst2[1] = src_l2[1] | src_r0[0] << 8;
6918 dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
6919 dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
6920 break;
6921
6922 case 38:
6923 dst2[1] = src_l2[1] | src_r0[0] << 16;
6924 dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
6925 dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
6926 break;
6927
6928 case 39:
6929 dst2[1] = src_l2[1] | src_r0[0] << 24;
6930 dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
6931 dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
6932 break;
6933
6934 case 40:
6935 dst2[2] = src_r0[0];
6936 dst2[3] = src_r0[1];
6937 break;
6938
6939 case 41:
6940 dst2[2] = src_l2[2] | src_r0[0] << 8;
6941 dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
6942 break;
6943
6944 case 42:
6945 dst2[2] = src_l2[2] | src_r0[0] << 16;
6946 dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
6947 break;
6948
6949 case 43:
6950 dst2[2] = src_l2[2] | src_r0[0] << 24;
6951 dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
6952 break;
6953
6954 case 44:
6955 dst2[3] = src_r0[0];
6956 break;
6957
6958 case 45:
6959 dst2[3] = src_l2[3] | src_r0[0] << 8;
6960 break;
6961
6962 case 46:
6963 dst2[3] = src_l2[3] | src_r0[0] << 16;
6964 break;
6965
6966 case 47:
6967 dst2[3] = src_l2[3] | src_r0[0] << 24;
6968 break;
6969 }
6970 }
6971
6972 // before: memcat16_9
6973 static void memcat_c15_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
6974 {
6975 switch (offset)
6976 {
6977 case 0:
6978 w0[0] = append0[0];
6979 w0[1] = append0[1];
6980 w0[2] = append0[2];
6981 w0[3] = append0[3];
6982 w1[0] = append1[0];
6983 w1[1] = append1[1];
6984 w1[2] = append1[2];
6985 w1[3] = append1[3];
6986 w2[0] = append2[0];
6987 break;
6988
6989 case 1:
6990 w0[0] = w0[0] | append0[0] << 8;
6991 w0[1] = append0[0] >> 24 | append0[1] << 8;
6992 w0[2] = append0[1] >> 24 | append0[2] << 8;
6993 w0[3] = append0[2] >> 24 | append0[3] << 8;
6994 w1[0] = append0[3] >> 24 | append1[0] << 8;
6995 w1[1] = append1[0] >> 24 | append1[1] << 8;
6996 w1[2] = append1[1] >> 24 | append1[2] << 8;
6997 w1[3] = append1[2] >> 24 | append1[3] << 8;
6998 w2[0] = append1[3] >> 24 | append2[0] << 8;
6999 w2[1] = append2[0] >> 24;
7000 break;
7001
7002 case 2:
7003 w0[0] = w0[0] | append0[0] << 16;
7004 w0[1] = append0[0] >> 16 | append0[1] << 16;
7005 w0[2] = append0[1] >> 16 | append0[2] << 16;
7006 w0[3] = append0[2] >> 16 | append0[3] << 16;
7007 w1[0] = append0[3] >> 16 | append1[0] << 16;
7008 w1[1] = append1[0] >> 16 | append1[1] << 16;
7009 w1[2] = append1[1] >> 16 | append1[2] << 16;
7010 w1[3] = append1[2] >> 16 | append1[3] << 16;
7011 w2[0] = append1[3] >> 16 | append2[0] << 16;
7012 w2[1] = append2[0] >> 16;
7013 break;
7014
7015 case 3:
7016 w0[0] = w0[0] | append0[0] << 24;
7017 w0[1] = append0[0] >> 8 | append0[1] << 24;
7018 w0[2] = append0[1] >> 8 | append0[2] << 24;
7019 w0[3] = append0[2] >> 8 | append0[3] << 24;
7020 w1[0] = append0[3] >> 8 | append1[0] << 24;
7021 w1[1] = append1[0] >> 8 | append1[1] << 24;
7022 w1[2] = append1[1] >> 8 | append1[2] << 24;
7023 w1[3] = append1[2] >> 8 | append1[3] << 24;
7024 w2[0] = append1[3] >> 8 | append2[0] << 24;
7025 w2[1] = append2[0] >> 8;
7026 break;
7027
7028 case 4:
7029 w0[1] = append0[0];
7030 w0[2] = append0[1];
7031 w0[3] = append0[2];
7032 w1[0] = append0[3];
7033 w1[1] = append1[0];
7034 w1[2] = append1[1];
7035 w1[3] = append1[2];
7036 w2[0] = append1[3];
7037 w2[1] = append2[0];
7038 break;
7039
7040 case 5:
7041 w0[1] = w0[1] | append0[0] << 8;
7042 w0[2] = append0[0] >> 24 | append0[1] << 8;
7043 w0[3] = append0[1] >> 24 | append0[2] << 8;
7044 w1[0] = append0[2] >> 24 | append0[3] << 8;
7045 w1[1] = append0[3] >> 24 | append1[0] << 8;
7046 w1[2] = append1[0] >> 24 | append1[1] << 8;
7047 w1[3] = append1[1] >> 24 | append1[2] << 8;
7048 w2[0] = append1[2] >> 24 | append1[3] << 8;
7049 w2[1] = append1[3] >> 24 | append2[0] << 8;
7050 w2[2] = append2[0] >> 24;
7051 break;
7052
7053 case 6:
7054 w0[1] = w0[1] | append0[0] << 16;
7055 w0[2] = append0[0] >> 16 | append0[1] << 16;
7056 w0[3] = append0[1] >> 16 | append0[2] << 16;
7057 w1[0] = append0[2] >> 16 | append0[3] << 16;
7058 w1[1] = append0[3] >> 16 | append1[0] << 16;
7059 w1[2] = append1[0] >> 16 | append1[1] << 16;
7060 w1[3] = append1[1] >> 16 | append1[2] << 16;
7061 w2[0] = append1[2] >> 16 | append1[3] << 16;
7062 w2[1] = append1[3] >> 16 | append2[0] << 16;
7063 w2[2] = append2[0] >> 16;
7064 break;
7065
7066 case 7:
7067 w0[1] = w0[1] | append0[0] << 24;
7068 w0[2] = append0[0] >> 8 | append0[1] << 24;
7069 w0[3] = append0[1] >> 8 | append0[2] << 24;
7070 w1[0] = append0[2] >> 8 | append0[3] << 24;
7071 w1[1] = append0[3] >> 8 | append1[0] << 24;
7072 w1[2] = append1[0] >> 8 | append1[1] << 24;
7073 w1[3] = append1[1] >> 8 | append1[2] << 24;
7074 w2[0] = append1[2] >> 8 | append1[3] << 24;
7075 w2[1] = append1[3] >> 8 | append2[0] << 24;
7076 w2[2] = append2[0] >> 8;
7077 break;
7078
7079 case 8:
7080 w0[2] = append0[0];
7081 w0[3] = append0[1];
7082 w1[0] = append0[2];
7083 w1[1] = append0[3];
7084 w1[2] = append1[0];
7085 w1[3] = append1[1];
7086 w2[0] = append1[2];
7087 w2[1] = append1[3];
7088 w2[2] = append2[0];
7089 break;
7090
7091 case 9:
7092 w0[2] = w0[2] | append0[0] << 8;
7093 w0[3] = append0[0] >> 24 | append0[1] << 8;
7094 w1[0] = append0[1] >> 24 | append0[2] << 8;
7095 w1[1] = append0[2] >> 24 | append0[3] << 8;
7096 w1[2] = append0[3] >> 24 | append1[0] << 8;
7097 w1[3] = append1[0] >> 24 | append1[1] << 8;
7098 w2[0] = append1[1] >> 24 | append1[2] << 8;
7099 w2[1] = append1[2] >> 24 | append1[3] << 8;
7100 w2[2] = append1[3] >> 24 | append2[0] << 8;
7101 w2[3] = append2[0] >> 24;
7102 break;
7103
7104 case 10:
7105 w0[2] = w0[2] | append0[0] << 16;
7106 w0[3] = append0[0] >> 16 | append0[1] << 16;
7107 w1[0] = append0[1] >> 16 | append0[2] << 16;
7108 w1[1] = append0[2] >> 16 | append0[3] << 16;
7109 w1[2] = append0[3] >> 16 | append1[0] << 16;
7110 w1[3] = append1[0] >> 16 | append1[1] << 16;
7111 w2[0] = append1[1] >> 16 | append1[2] << 16;
7112 w2[1] = append1[2] >> 16 | append1[3] << 16;
7113 w2[2] = append1[3] >> 16 | append2[0] << 16;
7114 w2[3] = append2[0] >> 16;
7115 break;
7116
7117 case 11:
7118 w0[2] = w0[2] | append0[0] << 24;
7119 w0[3] = append0[0] >> 8 | append0[1] << 24;
7120 w1[0] = append0[1] >> 8 | append0[2] << 24;
7121 w1[1] = append0[2] >> 8 | append0[3] << 24;
7122 w1[2] = append0[3] >> 8 | append1[0] << 24;
7123 w1[3] = append1[0] >> 8 | append1[1] << 24;
7124 w2[0] = append1[1] >> 8 | append1[2] << 24;
7125 w2[1] = append1[2] >> 8 | append1[3] << 24;
7126 w2[2] = append1[3] >> 8 | append2[0] << 24;
7127 w2[3] = append2[0] >> 8;
7128 break;
7129
7130 case 12:
7131 w0[3] = append0[0];
7132 w1[0] = append0[1];
7133 w1[1] = append0[2];
7134 w1[2] = append0[3];
7135 w1[3] = append1[0];
7136 w2[0] = append1[1];
7137 w2[1] = append1[2];
7138 w2[2] = append1[3];
7139 w2[3] = append2[0];
7140 break;
7141
7142 case 13:
7143 w0[3] = w0[3] | append0[0] << 8;
7144 w1[0] = append0[0] >> 24 | append0[1] << 8;
7145 w1[1] = append0[1] >> 24 | append0[2] << 8;
7146 w1[2] = append0[2] >> 24 | append0[3] << 8;
7147 w1[3] = append0[3] >> 24 | append1[0] << 8;
7148 w2[0] = append1[0] >> 24 | append1[1] << 8;
7149 w2[1] = append1[1] >> 24 | append1[2] << 8;
7150 w2[2] = append1[2] >> 24 | append1[3] << 8;
7151 w2[3] = append1[3] >> 24 | append2[0] << 8;
7152 w3[0] = append2[0] >> 24;
7153 break;
7154
7155 case 14:
7156 w0[3] = w0[3] | append0[0] << 16;
7157 w1[0] = append0[0] >> 16 | append0[1] << 16;
7158 w1[1] = append0[1] >> 16 | append0[2] << 16;
7159 w1[2] = append0[2] >> 16 | append0[3] << 16;
7160 w1[3] = append0[3] >> 16 | append1[0] << 16;
7161 w2[0] = append1[0] >> 16 | append1[1] << 16;
7162 w2[1] = append1[1] >> 16 | append1[2] << 16;
7163 w2[2] = append1[2] >> 16 | append1[3] << 16;
7164 w2[3] = append1[3] >> 16 | append2[0] << 16;
7165 w3[0] = append2[0] >> 16;
7166 break;
7167
7168 case 15:
7169 w0[3] = w0[3] | append0[0] << 24;
7170 w1[0] = append0[0] >> 8 | append0[1] << 24;
7171 w1[1] = append0[1] >> 8 | append0[2] << 24;
7172 w1[2] = append0[2] >> 8 | append0[3] << 24;
7173 w1[3] = append0[3] >> 8 | append1[0] << 24;
7174 w2[0] = append1[0] >> 8 | append1[1] << 24;
7175 w2[1] = append1[1] >> 8 | append1[2] << 24;
7176 w2[2] = append1[2] >> 8 | append1[3] << 24;
7177 w2[3] = append1[3] >> 8 | append2[0] << 24;
7178 w3[0] = append2[0] >> 8;
7179 break;
7180 }
7181 }
7182
7183 // before: memcat32_8
7184 static void memcat_c32_w4x4_a2x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset)
7185 {
7186 switch (offset)
7187 {
7188 case 0:
7189 w0[0] = append0[0];
7190 w0[1] = append0[1];
7191 w0[2] = append0[2];
7192 w0[3] = append0[3];
7193 w1[0] = append1[0];
7194 w1[1] = append1[1];
7195 w1[2] = append1[2];
7196 w1[3] = append1[3];
7197 break;
7198
7199 case 1:
7200 w0[0] = w0[0] | append0[0] << 8;
7201 w0[1] = append0[0] >> 24 | append0[1] << 8;
7202 w0[2] = append0[1] >> 24 | append0[2] << 8;
7203 w0[3] = append0[2] >> 24 | append0[3] << 8;
7204 w1[0] = append0[3] >> 24 | append1[0] << 8;
7205 w1[1] = append1[0] >> 24 | append1[1] << 8;
7206 w1[2] = append1[1] >> 24 | append1[2] << 8;
7207 w1[3] = append1[2] >> 24 | append1[3] << 8;
7208 w2[0] = append1[3] >> 24;
7209 break;
7210
7211 case 2:
7212 w0[0] = w0[0] | append0[0] << 16;
7213 w0[1] = append0[0] >> 16 | append0[1] << 16;
7214 w0[2] = append0[1] >> 16 | append0[2] << 16;
7215 w0[3] = append0[2] >> 16 | append0[3] << 16;
7216 w1[0] = append0[3] >> 16 | append1[0] << 16;
7217 w1[1] = append1[0] >> 16 | append1[1] << 16;
7218 w1[2] = append1[1] >> 16 | append1[2] << 16;
7219 w1[3] = append1[2] >> 16 | append1[3] << 16;
7220 w2[0] = append1[3] >> 16;
7221 break;
7222
7223 case 3:
7224 w0[0] = w0[0] | append0[0] << 24;
7225 w0[1] = append0[0] >> 8 | append0[1] << 24;
7226 w0[2] = append0[1] >> 8 | append0[2] << 24;
7227 w0[3] = append0[2] >> 8 | append0[3] << 24;
7228 w1[0] = append0[3] >> 8 | append1[0] << 24;
7229 w1[1] = append1[0] >> 8 | append1[1] << 24;
7230 w1[2] = append1[1] >> 8 | append1[2] << 24;
7231 w1[3] = append1[2] >> 8 | append1[3] << 24;
7232 w2[0] = append1[3] >> 8;
7233 break;
7234
7235 case 4:
7236 w0[1] = append0[0];
7237 w0[2] = append0[1];
7238 w0[3] = append0[2];
7239 w1[0] = append0[3];
7240 w1[1] = append1[0];
7241 w1[2] = append1[1];
7242 w1[3] = append1[2];
7243 w2[0] = append1[3];
7244 break;
7245
7246 case 5:
7247 w0[1] = w0[1] | append0[0] << 8;
7248 w0[2] = append0[0] >> 24 | append0[1] << 8;
7249 w0[3] = append0[1] >> 24 | append0[2] << 8;
7250 w1[0] = append0[2] >> 24 | append0[3] << 8;
7251 w1[1] = append0[3] >> 24 | append1[0] << 8;
7252 w1[2] = append1[0] >> 24 | append1[1] << 8;
7253 w1[3] = append1[1] >> 24 | append1[2] << 8;
7254 w2[0] = append1[2] >> 24 | append1[3] << 8;
7255 w2[1] = append1[3] >> 24;
7256 break;
7257
7258 case 6:
7259 w0[1] = w0[1] | append0[0] << 16;
7260 w0[2] = append0[0] >> 16 | append0[1] << 16;
7261 w0[3] = append0[1] >> 16 | append0[2] << 16;
7262 w1[0] = append0[2] >> 16 | append0[3] << 16;
7263 w1[1] = append0[3] >> 16 | append1[0] << 16;
7264 w1[2] = append1[0] >> 16 | append1[1] << 16;
7265 w1[3] = append1[1] >> 16 | append1[2] << 16;
7266 w2[0] = append1[2] >> 16 | append1[3] << 16;
7267 w2[1] = append1[3] >> 16;
7268 break;
7269
7270 case 7:
7271 w0[1] = w0[1] | append0[0] << 24;
7272 w0[2] = append0[0] >> 8 | append0[1] << 24;
7273 w0[3] = append0[1] >> 8 | append0[2] << 24;
7274 w1[0] = append0[2] >> 8 | append0[3] << 24;
7275 w1[1] = append0[3] >> 8 | append1[0] << 24;
7276 w1[2] = append1[0] >> 8 | append1[1] << 24;
7277 w1[3] = append1[1] >> 8 | append1[2] << 24;
7278 w2[0] = append1[2] >> 8 | append1[3] << 24;
7279 w2[1] = append1[3] >> 8;
7280 break;
7281
7282 case 8:
7283 w0[2] = append0[0];
7284 w0[3] = append0[1];
7285 w1[0] = append0[2];
7286 w1[1] = append0[3];
7287 w1[2] = append1[0];
7288 w1[3] = append1[1];
7289 w2[0] = append1[2];
7290 w2[1] = append1[3];
7291 break;
7292
7293 case 9:
7294 w0[2] = w0[2] | append0[0] << 8;
7295 w0[3] = append0[0] >> 24 | append0[1] << 8;
7296 w1[0] = append0[1] >> 24 | append0[2] << 8;
7297 w1[1] = append0[2] >> 24 | append0[3] << 8;
7298 w1[2] = append0[3] >> 24 | append1[0] << 8;
7299 w1[3] = append1[0] >> 24 | append1[1] << 8;
7300 w2[0] = append1[1] >> 24 | append1[2] << 8;
7301 w2[1] = append1[2] >> 24 | append1[3] << 8;
7302 w2[2] = append1[3] >> 24;
7303 break;
7304
7305 case 10:
7306 w0[2] = w0[2] | append0[0] << 16;
7307 w0[3] = append0[0] >> 16 | append0[1] << 16;
7308 w1[0] = append0[1] >> 16 | append0[2] << 16;
7309 w1[1] = append0[2] >> 16 | append0[3] << 16;
7310 w1[2] = append0[3] >> 16 | append1[0] << 16;
7311 w1[3] = append1[0] >> 16 | append1[1] << 16;
7312 w2[0] = append1[1] >> 16 | append1[2] << 16;
7313 w2[1] = append1[2] >> 16 | append1[3] << 16;
7314 w2[2] = append1[3] >> 16;
7315 break;
7316
7317 case 11:
7318 w0[2] = w0[2] | append0[0] << 24;
7319 w0[3] = append0[0] >> 8 | append0[1] << 24;
7320 w1[0] = append0[1] >> 8 | append0[2] << 24;
7321 w1[1] = append0[2] >> 8 | append0[3] << 24;
7322 w1[2] = append0[3] >> 8 | append1[0] << 24;
7323 w1[3] = append1[0] >> 8 | append1[1] << 24;
7324 w2[0] = append1[1] >> 8 | append1[2] << 24;
7325 w2[1] = append1[2] >> 8 | append1[3] << 24;
7326 w2[2] = append1[3] >> 8;
7327 break;
7328
7329 case 12:
7330 w0[3] = append0[0];
7331 w1[0] = append0[1];
7332 w1[1] = append0[2];
7333 w1[2] = append0[3];
7334 w1[3] = append1[0];
7335 w2[0] = append1[1];
7336 w2[1] = append1[2];
7337 w2[2] = append1[3];
7338 break;
7339
7340 case 13:
7341 w0[3] = w0[3] | append0[0] << 8;
7342 w1[0] = append0[0] >> 24 | append0[1] << 8;
7343 w1[1] = append0[1] >> 24 | append0[2] << 8;
7344 w1[2] = append0[2] >> 24 | append0[3] << 8;
7345 w1[3] = append0[3] >> 24 | append1[0] << 8;
7346 w2[0] = append1[0] >> 24 | append1[1] << 8;
7347 w2[1] = append1[1] >> 24 | append1[2] << 8;
7348 w2[2] = append1[2] >> 24 | append1[3] << 8;
7349 w2[3] = append1[3] >> 24;
7350 break;
7351
7352 case 14:
7353 w0[3] = w0[3] | append0[0] << 16;
7354 w1[0] = append0[0] >> 16 | append0[1] << 16;
7355 w1[1] = append0[1] >> 16 | append0[2] << 16;
7356 w1[2] = append0[2] >> 16 | append0[3] << 16;
7357 w1[3] = append0[3] >> 16 | append1[0] << 16;
7358 w2[0] = append1[0] >> 16 | append1[1] << 16;
7359 w2[1] = append1[1] >> 16 | append1[2] << 16;
7360 w2[2] = append1[2] >> 16 | append1[3] << 16;
7361 w2[3] = append1[3] >> 16;
7362 break;
7363
7364 case 15:
7365 w0[3] = w0[3] | append0[0] << 24;
7366 w1[0] = append0[0] >> 8 | append0[1] << 24;
7367 w1[1] = append0[1] >> 8 | append0[2] << 24;
7368 w1[2] = append0[2] >> 8 | append0[3] << 24;
7369 w1[3] = append0[3] >> 8 | append1[0] << 24;
7370 w2[0] = append1[0] >> 8 | append1[1] << 24;
7371 w2[1] = append1[1] >> 8 | append1[2] << 24;
7372 w2[2] = append1[2] >> 8 | append1[3] << 24;
7373 w2[3] = append1[3] >> 8;
7374 break;
7375
7376 case 16:
7377 w1[0] = append0[0];
7378 w1[1] = append0[1];
7379 w1[2] = append0[2];
7380 w1[3] = append0[3];
7381 w2[0] = append1[0];
7382 w2[1] = append1[1];
7383 w2[2] = append1[2];
7384 w2[3] = append1[3];
7385 break;
7386
7387 case 17:
7388 w1[0] = w1[0] | append0[0] << 8;
7389 w1[1] = append0[0] >> 24 | append0[1] << 8;
7390 w1[2] = append0[1] >> 24 | append0[2] << 8;
7391 w1[3] = append0[2] >> 24 | append0[3] << 8;
7392 w2[0] = append0[3] >> 24 | append1[0] << 8;
7393 w2[1] = append1[0] >> 24 | append1[1] << 8;
7394 w2[2] = append1[1] >> 24 | append1[2] << 8;
7395 w2[3] = append1[2] >> 24 | append1[3] << 8;
7396 w3[0] = append1[3] >> 24;
7397 break;
7398
7399 case 18:
7400 w1[0] = w1[0] | append0[0] << 16;
7401 w1[1] = append0[0] >> 16 | append0[1] << 16;
7402 w1[2] = append0[1] >> 16 | append0[2] << 16;
7403 w1[3] = append0[2] >> 16 | append0[3] << 16;
7404 w2[0] = append0[3] >> 16 | append1[0] << 16;
7405 w2[1] = append1[0] >> 16 | append1[1] << 16;
7406 w2[2] = append1[1] >> 16 | append1[2] << 16;
7407 w2[3] = append1[2] >> 16 | append1[3] << 16;
7408 w3[0] = append1[3] >> 16;
7409 break;
7410
7411 case 19:
7412 w1[0] = w1[0] | append0[0] << 24;
7413 w1[1] = append0[0] >> 8 | append0[1] << 24;
7414 w1[2] = append0[1] >> 8 | append0[2] << 24;
7415 w1[3] = append0[2] >> 8 | append0[3] << 24;
7416 w2[0] = append0[3] >> 8 | append1[0] << 24;
7417 w2[1] = append1[0] >> 8 | append1[1] << 24;
7418 w2[2] = append1[1] >> 8 | append1[2] << 24;
7419 w2[3] = append1[2] >> 8 | append1[3] << 24;
7420 w3[0] = append1[3] >> 8;
7421 break;
7422
7423 case 20:
7424 w1[1] = append0[0];
7425 w1[2] = append0[1];
7426 w1[3] = append0[2];
7427 w2[0] = append0[3];
7428 w2[1] = append1[0];
7429 w2[2] = append1[1];
7430 w2[3] = append1[2];
7431 w3[0] = append1[3];
7432 break;
7433
7434 case 21:
7435 w1[1] = w1[1] | append0[0] << 8;
7436 w1[2] = append0[0] >> 24 | append0[1] << 8;
7437 w1[3] = append0[1] >> 24 | append0[2] << 8;
7438 w2[0] = append0[2] >> 24 | append0[3] << 8;
7439 w2[1] = append0[3] >> 24 | append1[0] << 8;
7440 w2[2] = append1[0] >> 24 | append1[1] << 8;
7441 w2[3] = append1[1] >> 24 | append1[2] << 8;
7442 w3[0] = append1[2] >> 24 | append1[3] << 8;
7443 w3[1] = append1[3] >> 24;
7444 break;
7445
7446 case 22:
7447 w1[1] = w1[1] | append0[0] << 16;
7448 w1[2] = append0[0] >> 16 | append0[1] << 16;
7449 w1[3] = append0[1] >> 16 | append0[2] << 16;
7450 w2[0] = append0[2] >> 16 | append0[3] << 16;
7451 w2[1] = append0[3] >> 16 | append1[0] << 16;
7452 w2[2] = append1[0] >> 16 | append1[1] << 16;
7453 w2[3] = append1[1] >> 16 | append1[2] << 16;
7454 w3[0] = append1[2] >> 16 | append1[3] << 16;
7455 w3[1] = append1[3] >> 16;
7456 break;
7457
7458 case 23:
7459 w1[1] = w1[1] | append0[0] << 24;
7460 w1[2] = append0[0] >> 8 | append0[1] << 24;
7461 w1[3] = append0[1] >> 8 | append0[2] << 24;
7462 w2[0] = append0[2] >> 8 | append0[3] << 24;
7463 w2[1] = append0[3] >> 8 | append1[0] << 24;
7464 w2[2] = append1[0] >> 8 | append1[1] << 24;
7465 w2[3] = append1[1] >> 8 | append1[2] << 24;
7466 w3[0] = append1[2] >> 8 | append1[3] << 24;
7467 w3[1] = append1[3] >> 8;
7468 break;
7469
7470 case 24:
7471 w1[2] = append0[0];
7472 w1[3] = append0[1];
7473 w2[0] = append0[2];
7474 w2[1] = append0[3];
7475 w2[2] = append1[0];
7476 w2[3] = append1[1];
7477 w3[0] = append1[2];
7478 w3[1] = append1[3];
7479 break;
7480
7481 case 25:
7482 w1[2] = w1[2] | append0[0] << 8;
7483 w1[3] = append0[0] >> 24 | append0[1] << 8;
7484 w2[0] = append0[1] >> 24 | append0[2] << 8;
7485 w2[1] = append0[2] >> 24 | append0[3] << 8;
7486 w2[2] = append0[3] >> 24 | append1[0] << 8;
7487 w2[3] = append1[0] >> 24 | append1[1] << 8;
7488 w3[0] = append1[1] >> 24 | append1[2] << 8;
7489 w3[1] = append1[2] >> 24 | append1[3] << 8;
7490 break;
7491
7492 case 26:
7493 w1[2] = w1[2] | append0[0] << 16;
7494 w1[3] = append0[0] >> 16 | append0[1] << 16;
7495 w2[0] = append0[1] >> 16 | append0[2] << 16;
7496 w2[1] = append0[2] >> 16 | append0[3] << 16;
7497 w2[2] = append0[3] >> 16 | append1[0] << 16;
7498 w2[3] = append1[0] >> 16 | append1[1] << 16;
7499 w3[0] = append1[1] >> 16 | append1[2] << 16;
7500 w3[1] = append1[2] >> 16 | append1[3] << 16;
7501 break;
7502
7503 case 27:
7504 w1[2] = w1[2] | append0[0] << 24;
7505 w1[3] = append0[0] >> 8 | append0[1] << 24;
7506 w2[0] = append0[1] >> 8 | append0[2] << 24;
7507 w2[1] = append0[2] >> 8 | append0[3] << 24;
7508 w2[2] = append0[3] >> 8 | append1[0] << 24;
7509 w2[3] = append1[0] >> 8 | append1[1] << 24;
7510 w3[0] = append1[1] >> 8 | append1[2] << 24;
7511 w3[1] = append1[2] >> 8 | append1[3] << 24;
7512 break;
7513
7514 case 28:
7515 w1[3] = append0[0];
7516 w2[0] = append0[1];
7517 w2[1] = append0[2];
7518 w2[2] = append0[3];
7519 w2[3] = append1[0];
7520 w3[0] = append1[1];
7521 w3[1] = append1[2];
7522 break;
7523
7524 case 29:
7525 w1[3] = w1[3] | append0[0] << 8;
7526 w2[0] = append0[0] >> 24 | append0[1] << 8;
7527 w2[1] = append0[1] >> 24 | append0[2] << 8;
7528 w2[2] = append0[2] >> 24 | append0[3] << 8;
7529 w2[3] = append0[3] >> 24 | append1[0] << 8;
7530 w3[0] = append1[0] >> 24 | append1[1] << 8;
7531 w3[1] = append1[1] >> 24 | append1[2] << 8;
7532 break;
7533
7534 case 30:
7535 w1[3] = w1[3] | append0[0] << 16;
7536 w2[0] = append0[0] >> 16 | append0[1] << 16;
7537 w2[1] = append0[1] >> 16 | append0[2] << 16;
7538 w2[2] = append0[2] >> 16 | append0[3] << 16;
7539 w2[3] = append0[3] >> 16 | append1[0] << 16;
7540 w3[0] = append1[0] >> 16 | append1[1] << 16;
7541 w3[1] = append1[1] >> 16 | append1[2] << 16;
7542 break;
7543
7544 case 31:
7545 w1[3] = w1[3] | append0[0] << 24;
7546 w2[0] = append0[0] >> 8 | append0[1] << 24;
7547 w2[1] = append0[1] >> 8 | append0[2] << 24;
7548 w2[2] = append0[2] >> 8 | append0[3] << 24;
7549 w2[3] = append0[3] >> 8 | append1[0] << 24;
7550 w3[0] = append1[0] >> 8 | append1[1] << 24;
7551 w3[1] = append1[1] >> 8 | append1[2] << 24;
7552 break;
7553
7554 case 32:
7555 w2[0] = append0[0];
7556 w2[1] = append0[1];
7557 w2[2] = append0[2];
7558 w2[3] = append0[3];
7559 w3[0] = append1[0];
7560 w3[1] = append1[1];
7561 break;
7562 }
7563 }
7564
7565 // before: memcat32_9
7566 static void memcat_c32_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
7567 {
7568 switch (offset)
7569 {
7570 case 0:
7571 w0[0] = append0[0];
7572 w0[1] = append0[1];
7573 w0[2] = append0[2];
7574 w0[3] = append0[3];
7575 w1[0] = append1[0];
7576 w1[1] = append1[1];
7577 w1[2] = append1[2];
7578 w1[3] = append1[3];
7579 w2[0] = append2[0];
7580 break;
7581
7582 case 1:
7583 w0[0] = w0[0] | append0[0] << 8;
7584 w0[1] = append0[0] >> 24 | append0[1] << 8;
7585 w0[2] = append0[1] >> 24 | append0[2] << 8;
7586 w0[3] = append0[2] >> 24 | append0[3] << 8;
7587 w1[0] = append0[3] >> 24 | append1[0] << 8;
7588 w1[1] = append1[0] >> 24 | append1[1] << 8;
7589 w1[2] = append1[1] >> 24 | append1[2] << 8;
7590 w1[3] = append1[2] >> 24 | append1[3] << 8;
7591 w2[0] = append1[3] >> 24 | append2[0] << 8;
7592 w2[1] = append2[0] >> 24;
7593 break;
7594
7595 case 2:
7596 w0[0] = w0[0] | append0[0] << 16;
7597 w0[1] = append0[0] >> 16 | append0[1] << 16;
7598 w0[2] = append0[1] >> 16 | append0[2] << 16;
7599 w0[3] = append0[2] >> 16 | append0[3] << 16;
7600 w1[0] = append0[3] >> 16 | append1[0] << 16;
7601 w1[1] = append1[0] >> 16 | append1[1] << 16;
7602 w1[2] = append1[1] >> 16 | append1[2] << 16;
7603 w1[3] = append1[2] >> 16 | append1[3] << 16;
7604 w2[0] = append1[3] >> 16 | append2[0] << 16;
7605 w2[1] = append2[0] >> 16;
7606 break;
7607
7608 case 3:
7609 w0[0] = w0[0] | append0[0] << 24;
7610 w0[1] = append0[0] >> 8 | append0[1] << 24;
7611 w0[2] = append0[1] >> 8 | append0[2] << 24;
7612 w0[3] = append0[2] >> 8 | append0[3] << 24;
7613 w1[0] = append0[3] >> 8 | append1[0] << 24;
7614 w1[1] = append1[0] >> 8 | append1[1] << 24;
7615 w1[2] = append1[1] >> 8 | append1[2] << 24;
7616 w1[3] = append1[2] >> 8 | append1[3] << 24;
7617 w2[0] = append1[3] >> 8 | append2[0] << 24;
7618 w2[1] = append2[0] >> 8;
7619 break;
7620
7621 case 4:
7622 w0[1] = append0[0];
7623 w0[2] = append0[1];
7624 w0[3] = append0[2];
7625 w1[0] = append0[3];
7626 w1[1] = append1[0];
7627 w1[2] = append1[1];
7628 w1[3] = append1[2];
7629 w2[0] = append1[3];
7630 w2[1] = append2[0];
7631 break;
7632
7633 case 5:
7634 w0[1] = w0[1] | append0[0] << 8;
7635 w0[2] = append0[0] >> 24 | append0[1] << 8;
7636 w0[3] = append0[1] >> 24 | append0[2] << 8;
7637 w1[0] = append0[2] >> 24 | append0[3] << 8;
7638 w1[1] = append0[3] >> 24 | append1[0] << 8;
7639 w1[2] = append1[0] >> 24 | append1[1] << 8;
7640 w1[3] = append1[1] >> 24 | append1[2] << 8;
7641 w2[0] = append1[2] >> 24 | append1[3] << 8;
7642 w2[1] = append1[3] >> 24 | append2[0] << 8;
7643 w2[2] = append2[0] >> 24;
7644 break;
7645
7646 case 6:
7647 w0[1] = w0[1] | append0[0] << 16;
7648 w0[2] = append0[0] >> 16 | append0[1] << 16;
7649 w0[3] = append0[1] >> 16 | append0[2] << 16;
7650 w1[0] = append0[2] >> 16 | append0[3] << 16;
7651 w1[1] = append0[3] >> 16 | append1[0] << 16;
7652 w1[2] = append1[0] >> 16 | append1[1] << 16;
7653 w1[3] = append1[1] >> 16 | append1[2] << 16;
7654 w2[0] = append1[2] >> 16 | append1[3] << 16;
7655 w2[1] = append1[3] >> 16 | append2[0] << 16;
7656 w2[2] = append2[0] >> 16;
7657 break;
7658
7659 case 7:
7660 w0[1] = w0[1] | append0[0] << 24;
7661 w0[2] = append0[0] >> 8 | append0[1] << 24;
7662 w0[3] = append0[1] >> 8 | append0[2] << 24;
7663 w1[0] = append0[2] >> 8 | append0[3] << 24;
7664 w1[1] = append0[3] >> 8 | append1[0] << 24;
7665 w1[2] = append1[0] >> 8 | append1[1] << 24;
7666 w1[3] = append1[1] >> 8 | append1[2] << 24;
7667 w2[0] = append1[2] >> 8 | append1[3] << 24;
7668 w2[1] = append1[3] >> 8 | append2[0] << 24;
7669 w2[2] = append2[0] >> 8;
7670 break;
7671
7672 case 8:
7673 w0[2] = append0[0];
7674 w0[3] = append0[1];
7675 w1[0] = append0[2];
7676 w1[1] = append0[3];
7677 w1[2] = append1[0];
7678 w1[3] = append1[1];
7679 w2[0] = append1[2];
7680 w2[1] = append1[3];
7681 w2[2] = append2[0];
7682 break;
7683
7684 case 9:
7685 w0[2] = w0[2] | append0[0] << 8;
7686 w0[3] = append0[0] >> 24 | append0[1] << 8;
7687 w1[0] = append0[1] >> 24 | append0[2] << 8;
7688 w1[1] = append0[2] >> 24 | append0[3] << 8;
7689 w1[2] = append0[3] >> 24 | append1[0] << 8;
7690 w1[3] = append1[0] >> 24 | append1[1] << 8;
7691 w2[0] = append1[1] >> 24 | append1[2] << 8;
7692 w2[1] = append1[2] >> 24 | append1[3] << 8;
7693 w2[2] = append1[3] >> 24 | append2[0] << 8;
7694 w2[3] = append2[0] >> 24;
7695 break;
7696
7697 case 10:
7698 w0[2] = w0[2] | append0[0] << 16;
7699 w0[3] = append0[0] >> 16 | append0[1] << 16;
7700 w1[0] = append0[1] >> 16 | append0[2] << 16;
7701 w1[1] = append0[2] >> 16 | append0[3] << 16;
7702 w1[2] = append0[3] >> 16 | append1[0] << 16;
7703 w1[3] = append1[0] >> 16 | append1[1] << 16;
7704 w2[0] = append1[1] >> 16 | append1[2] << 16;
7705 w2[1] = append1[2] >> 16 | append1[3] << 16;
7706 w2[2] = append1[3] >> 16 | append2[0] << 16;
7707 w2[3] = append2[0] >> 16;
7708 break;
7709
7710 case 11:
7711 w0[2] = w0[2] | append0[0] << 24;
7712 w0[3] = append0[0] >> 8 | append0[1] << 24;
7713 w1[0] = append0[1] >> 8 | append0[2] << 24;
7714 w1[1] = append0[2] >> 8 | append0[3] << 24;
7715 w1[2] = append0[3] >> 8 | append1[0] << 24;
7716 w1[3] = append1[0] >> 8 | append1[1] << 24;
7717 w2[0] = append1[1] >> 8 | append1[2] << 24;
7718 w2[1] = append1[2] >> 8 | append1[3] << 24;
7719 w2[2] = append1[3] >> 8 | append2[0] << 24;
7720 w2[3] = append2[0] >> 8;
7721 break;
7722
7723 case 12:
7724 w0[3] = append0[0];
7725 w1[0] = append0[1];
7726 w1[1] = append0[2];
7727 w1[2] = append0[3];
7728 w1[3] = append1[0];
7729 w2[0] = append1[1];
7730 w2[1] = append1[2];
7731 w2[2] = append1[3];
7732 w2[3] = append2[0];
7733 break;
7734
7735 case 13:
7736 w0[3] = w0[3] | append0[0] << 8;
7737 w1[0] = append0[0] >> 24 | append0[1] << 8;
7738 w1[1] = append0[1] >> 24 | append0[2] << 8;
7739 w1[2] = append0[2] >> 24 | append0[3] << 8;
7740 w1[3] = append0[3] >> 24 | append1[0] << 8;
7741 w2[0] = append1[0] >> 24 | append1[1] << 8;
7742 w2[1] = append1[1] >> 24 | append1[2] << 8;
7743 w2[2] = append1[2] >> 24 | append1[3] << 8;
7744 w2[3] = append1[3] >> 24 | append2[0] << 8;
7745 w3[0] = append2[0] >> 24;
7746 break;
7747
7748 case 14:
7749 w0[3] = w0[3] | append0[0] << 16;
7750 w1[0] = append0[0] >> 16 | append0[1] << 16;
7751 w1[1] = append0[1] >> 16 | append0[2] << 16;
7752 w1[2] = append0[2] >> 16 | append0[3] << 16;
7753 w1[3] = append0[3] >> 16 | append1[0] << 16;
7754 w2[0] = append1[0] >> 16 | append1[1] << 16;
7755 w2[1] = append1[1] >> 16 | append1[2] << 16;
7756 w2[2] = append1[2] >> 16 | append1[3] << 16;
7757 w2[3] = append1[3] >> 16 | append2[0] << 16;
7758 w3[0] = append2[0] >> 16;
7759 break;
7760
7761 case 15:
7762 w0[3] = w0[3] | append0[0] << 24;
7763 w1[0] = append0[0] >> 8 | append0[1] << 24;
7764 w1[1] = append0[1] >> 8 | append0[2] << 24;
7765 w1[2] = append0[2] >> 8 | append0[3] << 24;
7766 w1[3] = append0[3] >> 8 | append1[0] << 24;
7767 w2[0] = append1[0] >> 8 | append1[1] << 24;
7768 w2[1] = append1[1] >> 8 | append1[2] << 24;
7769 w2[2] = append1[2] >> 8 | append1[3] << 24;
7770 w2[3] = append1[3] >> 8 | append2[0] << 24;
7771 w3[0] = append2[0] >> 8;
7772 break;
7773
7774 case 16:
7775 w1[0] = append0[0];
7776 w1[1] = append0[1];
7777 w1[2] = append0[2];
7778 w1[3] = append0[3];
7779 w2[0] = append1[0];
7780 w2[1] = append1[1];
7781 w2[2] = append1[2];
7782 w2[3] = append1[3];
7783 w3[0] = append2[0];
7784 break;
7785
7786 case 17:
7787 w1[0] = w1[0] | append0[0] << 8;
7788 w1[1] = append0[0] >> 24 | append0[1] << 8;
7789 w1[2] = append0[1] >> 24 | append0[2] << 8;
7790 w1[3] = append0[2] >> 24 | append0[3] << 8;
7791 w2[0] = append0[3] >> 24 | append1[0] << 8;
7792 w2[1] = append1[0] >> 24 | append1[1] << 8;
7793 w2[2] = append1[1] >> 24 | append1[2] << 8;
7794 w2[3] = append1[2] >> 24 | append1[3] << 8;
7795 w3[0] = append1[3] >> 24 | append2[0] << 8;
7796 w3[1] = append2[0] >> 24;
7797 break;
7798
7799 case 18:
7800 w1[0] = w1[0] | append0[0] << 16;
7801 w1[1] = append0[0] >> 16 | append0[1] << 16;
7802 w1[2] = append0[1] >> 16 | append0[2] << 16;
7803 w1[3] = append0[2] >> 16 | append0[3] << 16;
7804 w2[0] = append0[3] >> 16 | append1[0] << 16;
7805 w2[1] = append1[0] >> 16 | append1[1] << 16;
7806 w2[2] = append1[1] >> 16 | append1[2] << 16;
7807 w2[3] = append1[2] >> 16 | append1[3] << 16;
7808 w3[0] = append1[3] >> 16 | append2[0] << 16;
7809 w3[1] = append2[0] >> 16;
7810 break;
7811
7812 case 19:
7813 w1[0] = w1[0] | append0[0] << 24;
7814 w1[1] = append0[0] >> 8 | append0[1] << 24;
7815 w1[2] = append0[1] >> 8 | append0[2] << 24;
7816 w1[3] = append0[2] >> 8 | append0[3] << 24;
7817 w2[0] = append0[3] >> 8 | append1[0] << 24;
7818 w2[1] = append1[0] >> 8 | append1[1] << 24;
7819 w2[2] = append1[1] >> 8 | append1[2] << 24;
7820 w2[3] = append1[2] >> 8 | append1[3] << 24;
7821 w3[0] = append1[3] >> 8 | append2[0] << 24;
7822 w3[1] = append2[0] >> 8;
7823 break;
7824
7825 case 20:
7826 w1[1] = append0[0];
7827 w1[2] = append0[1];
7828 w1[3] = append0[2];
7829 w2[0] = append0[3];
7830 w2[1] = append1[0];
7831 w2[2] = append1[1];
7832 w2[3] = append1[2];
7833 w3[0] = append1[3];
7834 w3[1] = append2[0];
7835 break;
7836
7837 case 21:
7838 w1[1] = w1[1] | append0[0] << 8;
7839 w1[2] = append0[0] >> 24 | append0[1] << 8;
7840 w1[3] = append0[1] >> 24 | append0[2] << 8;
7841 w2[0] = append0[2] >> 24 | append0[3] << 8;
7842 w2[1] = append0[3] >> 24 | append1[0] << 8;
7843 w2[2] = append1[0] >> 24 | append1[1] << 8;
7844 w2[3] = append1[1] >> 24 | append1[2] << 8;
7845 w3[0] = append1[2] >> 24 | append1[3] << 8;
7846 w3[1] = append1[3] >> 24 | append2[0] << 8;
7847 break;
7848
7849 case 22:
7850 w1[1] = w1[1] | append0[0] << 16;
7851 w1[2] = append0[0] >> 16 | append0[1] << 16;
7852 w1[3] = append0[1] >> 16 | append0[2] << 16;
7853 w2[0] = append0[2] >> 16 | append0[3] << 16;
7854 w2[1] = append0[3] >> 16 | append1[0] << 16;
7855 w2[2] = append1[0] >> 16 | append1[1] << 16;
7856 w2[3] = append1[1] >> 16 | append1[2] << 16;
7857 w3[0] = append1[2] >> 16 | append1[3] << 16;
7858 w3[1] = append1[3] >> 16 | append2[0] << 16;
7859 break;
7860
7861 case 23:
7862 w1[1] = w1[1] | append0[0] << 24;
7863 w1[2] = append0[0] >> 8 | append0[1] << 24;
7864 w1[3] = append0[1] >> 8 | append0[2] << 24;
7865 w2[0] = append0[2] >> 8 | append0[3] << 24;
7866 w2[1] = append0[3] >> 8 | append1[0] << 24;
7867 w2[2] = append1[0] >> 8 | append1[1] << 24;
7868 w2[3] = append1[1] >> 8 | append1[2] << 24;
7869 w3[0] = append1[2] >> 8 | append1[3] << 24;
7870 w3[1] = append1[3] >> 8 | append2[0] << 24;
7871 break;
7872
7873 case 24:
7874 w1[2] = append0[0];
7875 w1[3] = append0[1];
7876 w2[0] = append0[2];
7877 w2[1] = append0[3];
7878 w2[2] = append1[0];
7879 w2[3] = append1[1];
7880 w3[0] = append1[2];
7881 w3[1] = append1[3];
7882 break;
7883
7884 case 25:
7885 w1[2] = w1[2] | append0[0] << 8;
7886 w1[3] = append0[0] >> 24 | append0[1] << 8;
7887 w2[0] = append0[1] >> 24 | append0[2] << 8;
7888 w2[1] = append0[2] >> 24 | append0[3] << 8;
7889 w2[2] = append0[3] >> 24 | append1[0] << 8;
7890 w2[3] = append1[0] >> 24 | append1[1] << 8;
7891 w3[0] = append1[1] >> 24 | append1[2] << 8;
7892 w3[1] = append1[2] >> 24 | append1[3] << 8;
7893 break;
7894
7895 case 26:
7896 w1[2] = w1[2] | append0[0] << 16;
7897 w1[3] = append0[0] >> 16 | append0[1] << 16;
7898 w2[0] = append0[1] >> 16 | append0[2] << 16;
7899 w2[1] = append0[2] >> 16 | append0[3] << 16;
7900 w2[2] = append0[3] >> 16 | append1[0] << 16;
7901 w2[3] = append1[0] >> 16 | append1[1] << 16;
7902 w3[0] = append1[1] >> 16 | append1[2] << 16;
7903 w3[1] = append1[2] >> 16 | append1[3] << 16;
7904 break;
7905
7906 case 27:
7907 w1[2] = w1[2] | append0[0] << 24;
7908 w1[3] = append0[0] >> 8 | append0[1] << 24;
7909 w2[0] = append0[1] >> 8 | append0[2] << 24;
7910 w2[1] = append0[2] >> 8 | append0[3] << 24;
7911 w2[2] = append0[3] >> 8 | append1[0] << 24;
7912 w2[3] = append1[0] >> 8 | append1[1] << 24;
7913 w3[0] = append1[1] >> 8 | append1[2] << 24;
7914 w3[1] = append1[2] >> 8 | append1[3] << 24;
7915 break;
7916
7917 case 28:
7918 w1[3] = append0[0];
7919 w2[0] = append0[1];
7920 w2[1] = append0[2];
7921 w2[2] = append0[3];
7922 w2[3] = append1[0];
7923 w3[0] = append1[1];
7924 w3[1] = append1[2];
7925 break;
7926
7927 case 29:
7928 w1[3] = w1[3] | append0[0] << 8;
7929 w2[0] = append0[0] >> 24 | append0[1] << 8;
7930 w2[1] = append0[1] >> 24 | append0[2] << 8;
7931 w2[2] = append0[2] >> 24 | append0[3] << 8;
7932 w2[3] = append0[3] >> 24 | append1[0] << 8;
7933 w3[0] = append1[0] >> 24 | append1[1] << 8;
7934 w3[1] = append1[1] >> 24 | append1[2] << 8;
7935 break;
7936
7937 case 30:
7938 w1[3] = w1[3] | append0[0] << 16;
7939 w2[0] = append0[0] >> 16 | append0[1] << 16;
7940 w2[1] = append0[1] >> 16 | append0[2] << 16;
7941 w2[2] = append0[2] >> 16 | append0[3] << 16;
7942 w2[3] = append0[3] >> 16 | append1[0] << 16;
7943 w3[0] = append1[0] >> 16 | append1[1] << 16;
7944 w3[1] = append1[1] >> 16 | append1[2] << 16;
7945 break;
7946
7947 case 31:
7948 w1[3] = w1[3] | append0[0] << 24;
7949 w2[0] = append0[0] >> 8 | append0[1] << 24;
7950 w2[1] = append0[1] >> 8 | append0[2] << 24;
7951 w2[2] = append0[2] >> 8 | append0[3] << 24;
7952 w2[3] = append0[3] >> 8 | append1[0] << 24;
7953 w3[0] = append1[0] >> 8 | append1[1] << 24;
7954 w3[1] = append1[1] >> 8 | append1[2] << 24;
7955 break;
7956
7957 case 32:
7958 w2[0] = append0[0];
7959 w2[1] = append0[1];
7960 w2[2] = append0[2];
7961 w2[3] = append0[3];
7962 w3[0] = append1[0];
7963 w3[1] = append1[1];
7964 break;
7965 }
7966 }
7967
7968 */