2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "OpenCL/types_ocl.c"
18 #include "OpenCL/common.c"
20 __constant u32 te0[256] =
22 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
23 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
24 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
25 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
26 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
27 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
28 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
29 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
30 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
31 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
32 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
33 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
34 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
35 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
36 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
37 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
38 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
39 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
40 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
41 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
42 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
43 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
44 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
45 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
46 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
47 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
48 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
49 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
50 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
51 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
52 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
53 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
54 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
55 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
56 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
57 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
58 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
59 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
60 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
61 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
62 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
63 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
64 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
65 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
66 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
67 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
68 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
69 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
70 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
71 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
72 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
73 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
74 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
75 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
76 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
77 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
78 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
79 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
80 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
81 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
82 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
83 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
84 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
85 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
88 __constant u32 te1[256] =
90 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
91 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
92 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
93 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
94 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
95 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
96 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
97 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
98 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
99 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
100 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
101 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
102 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
103 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
104 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
105 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
106 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
107 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
108 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
109 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
110 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
111 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
112 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
113 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
114 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
115 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
116 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
117 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
118 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
119 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
120 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
121 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
122 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
123 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
124 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
125 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
126 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
127 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
128 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
129 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
130 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
131 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
132 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
133 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
134 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
135 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
136 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
137 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
138 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
139 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
140 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
141 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
142 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
143 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
144 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
145 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
146 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
147 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
148 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
149 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
150 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
151 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
152 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
153 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
156 __constant u32 te2[256] =
158 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
159 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
160 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
161 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
162 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
163 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
164 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
165 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
166 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
167 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
168 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
169 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
170 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
171 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
172 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
173 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
174 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
175 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
176 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
177 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
178 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
179 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
180 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
181 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
182 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
183 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
184 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
185 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
186 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
187 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
188 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
189 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
190 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
191 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
192 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
193 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
194 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
195 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
196 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
197 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
198 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
199 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
200 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
201 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
202 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
203 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
204 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
205 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
206 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
207 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
208 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
209 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
210 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
211 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
212 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
213 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
214 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
215 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
216 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
217 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
218 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
219 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
220 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
221 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
224 __constant u32 te3[256] =
226 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
227 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
228 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
229 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
230 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
231 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
232 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
233 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
234 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
235 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
236 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
237 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
238 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
239 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
240 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
241 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
242 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
243 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
244 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
245 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
246 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
247 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
248 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
249 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
250 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
251 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
252 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
253 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
254 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
255 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
256 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
257 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
258 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
259 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
260 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
261 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
262 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
263 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
264 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
265 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
266 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
267 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
268 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
269 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
270 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
271 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
272 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
273 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
274 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
275 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
276 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
277 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
278 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
279 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
280 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
281 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
282 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
283 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
284 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
285 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
286 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
287 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
288 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
289 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
292 __constant u32 te4[256] =
294 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
295 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
296 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
297 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
298 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
299 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
300 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
301 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
302 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
303 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
304 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
305 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
306 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
307 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
308 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
309 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
310 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
311 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
312 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
313 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
314 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
315 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
316 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
317 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
318 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
319 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
320 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
321 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
322 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
323 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
324 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
325 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
326 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
327 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
328 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
329 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
330 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
331 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
332 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
333 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
334 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
335 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
336 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
337 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
338 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
339 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
340 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
341 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
342 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
343 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
344 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
345 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
346 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
347 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
348 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
349 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
350 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
351 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
352 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
353 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
354 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
355 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
356 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
357 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
360 __constant u32 td0[256] =
362 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
363 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
364 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
365 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
366 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
367 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
368 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
369 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
370 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
371 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
372 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
373 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
374 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
375 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
376 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
377 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
378 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
379 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
380 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
381 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
382 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
383 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
384 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
385 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
386 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
387 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
388 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
389 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
390 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
391 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
392 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
393 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
394 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
395 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
396 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
397 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
398 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
399 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
400 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
401 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
402 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
403 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
404 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
405 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
406 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
407 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
408 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
409 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
410 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
411 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
412 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
413 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
414 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
415 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
416 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
417 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
418 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
419 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
420 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
421 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
422 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
423 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
424 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
425 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
428 __constant u32 td1[256] =
430 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
431 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
432 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
433 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
434 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
435 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
436 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
437 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
438 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
439 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
440 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
441 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
442 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
443 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
444 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
445 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
446 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
447 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
448 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
449 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
450 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
451 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
452 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
453 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
454 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
455 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
456 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
457 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
458 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
459 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
460 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
461 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
462 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
463 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
464 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
465 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
466 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
467 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
468 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
469 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
470 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
471 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
472 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
473 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
474 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
475 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
476 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
477 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
478 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
479 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
480 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
481 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
482 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
483 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
484 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
485 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
486 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
487 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
488 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
489 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
490 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
491 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
492 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
493 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
496 __constant u32 td2[256] =
498 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
499 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
500 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
501 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
502 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
503 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
504 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
505 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
506 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
507 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
508 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
509 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
510 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
511 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
512 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
513 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
514 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
515 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
516 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
517 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
518 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
519 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
520 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
521 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
522 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
523 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
524 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
525 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
526 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
527 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
528 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
529 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
530 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
531 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
532 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
533 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
534 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
535 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
536 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
537 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
538 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
539 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
540 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
541 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
542 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
543 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
544 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
545 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
546 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
547 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
548 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
549 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
550 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
551 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
552 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
553 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
554 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
555 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
556 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
557 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
558 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
559 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
560 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
561 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
564 __constant u32 td3[256] =
566 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
567 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
568 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
569 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
570 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
571 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
572 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
573 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
574 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
575 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
576 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
577 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
578 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
579 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
580 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
581 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
582 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
583 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
584 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
585 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
586 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
587 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
588 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
589 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
590 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
591 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
592 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
593 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
594 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
595 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
596 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
597 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
598 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
599 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
600 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
601 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
602 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
603 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
604 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
605 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
606 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
607 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
608 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
609 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
610 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
611 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
612 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
613 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
614 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
615 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
616 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
617 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
618 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
619 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
620 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
621 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
622 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
623 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
624 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
625 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
626 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
627 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
628 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
629 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
632 __constant u32 td4[256] =
634 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
635 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
636 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
637 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
638 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
639 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
640 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
641 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
642 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
643 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
644 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
645 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
646 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
647 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
648 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
649 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
650 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
651 0x86868686, 0x68686868, 0x98989898, 0x16161616,
652 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
653 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
654 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
655 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
656 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
657 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
658 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
659 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
660 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
661 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
662 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
663 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
664 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
665 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
666 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
667 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
668 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
669 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
670 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
671 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
672 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
673 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
674 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
675 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
676 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
677 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
678 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
679 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
680 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
681 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
682 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
683 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
684 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
685 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
686 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
687 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
688 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
689 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
690 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
691 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
692 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
693 0x83838383, 0x53535353, 0x99999999, 0x61616161,
694 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
695 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
696 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
697 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
700 __constant u32 rcon[] =
702 0x01000000, 0x02000000, 0x04000000, 0x08000000,
703 0x10000000, 0x20000000, 0x40000000, 0x80000000,
704 0x1b000000, 0x36000000,
707 static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
714 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
716 u32 temp = rek[j + 3];
718 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
719 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
720 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
721 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
723 rek[j + 4] = rek[j + 0]
727 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
728 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
729 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
733 static void AES128_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
735 for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
739 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
740 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
741 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
742 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
745 for (u32 i = 1, j = 4; i < 10; i += 1, j += 4)
748 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
749 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
750 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
751 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
754 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
755 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
756 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
757 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
760 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
761 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
762 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
763 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
766 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
767 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
768 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
769 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
773 static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
775 u32 s0 = in[0] ^ rdk[0];
776 u32 s1 = in[1] ^ rdk[1];
777 u32 s2 = in[2] ^ rdk[2];
778 u32 s3 = in[3] ^ rdk[3];
785 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
786 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
787 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
788 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
789 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
790 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
791 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
792 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
793 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
794 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
795 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
796 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
797 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
798 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
799 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
800 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
801 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
802 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
803 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
804 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
805 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
806 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
807 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
808 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
809 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
810 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
811 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
812 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
813 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
814 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
815 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
816 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
817 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
818 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
819 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
820 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
822 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
823 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
824 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
825 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
828 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
829 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
830 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
831 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
834 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
835 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
836 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
837 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
840 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
841 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
842 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
843 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
847 static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
868 u32 temp = rek[j + 7];
870 rek[j + 8] = rek[j + 0]
871 ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000)
872 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
873 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
874 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff)
877 rek[j + 9] = rek[j + 1] ^ rek[j + 8];
878 rek[j + 10] = rek[j + 2] ^ rek[j + 9];
879 rek[j + 11] = rek[j + 3] ^ rek[j + 10];
889 rek[j + 12] = rek[j + 4]
890 ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000)
891 ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000)
892 ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00)
893 ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff);
895 rek[j + 13] = rek[j + 5] ^ rek[j + 12];
896 rek[j + 14] = rek[j + 6] ^ rek[j + 13];
897 rek[j + 15] = rek[j + 7] ^ rek[j + 14];
903 static void AES256_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
905 for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
909 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
910 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
911 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
912 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
915 for (u32 i = 1, j = 4; i < 14; i += 1, j += 4)
918 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
919 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
920 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
921 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
924 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
925 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
926 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
927 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
930 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
931 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
932 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
933 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
936 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
937 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
938 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
939 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
943 static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
945 u32 s0 = in[0] ^ rdk[0];
946 u32 s1 = in[1] ^ rdk[1];
947 u32 s2 = in[2] ^ rdk[2];
948 u32 s3 = in[3] ^ rdk[3];
955 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
956 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
957 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
958 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
959 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
960 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
961 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
962 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
963 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
964 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
965 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
966 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
967 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
968 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
969 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
970 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
971 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
972 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
973 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
974 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
975 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
976 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
977 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
978 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
979 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
980 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
981 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
982 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
983 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
984 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
985 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
986 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
987 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
988 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
989 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
990 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
991 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40];
992 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41];
993 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42];
994 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43];
995 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44];
996 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45];
997 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46];
998 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47];
999 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48];
1000 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49];
1001 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50];
1002 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51];
1003 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52];
1004 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53];
1005 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54];
1006 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55];
1008 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
1009 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
1010 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
1011 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
1014 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
1015 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
1016 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
1017 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
1020 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
1021 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
1022 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
1023 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
1026 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
1027 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
1028 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
1029 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
1033 static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
1035 u32 s0 = in[0] ^ rek[0];
1036 u32 s1 = in[1] ^ rek[1];
1037 u32 s2 = in[2] ^ rek[2];
1038 u32 s3 = in[3] ^ rek[3];
1045 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
1046 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
1047 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
1048 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
1049 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
1050 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
1051 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
1052 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
1053 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
1054 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
1055 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
1056 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
1057 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
1058 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
1059 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
1060 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
1061 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
1062 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
1063 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
1064 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
1065 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
1066 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
1067 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
1068 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
1069 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
1070 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
1071 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
1072 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
1073 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
1074 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
1075 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
1076 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
1077 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
1078 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
1079 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
1080 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
1081 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40];
1082 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41];
1083 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42];
1084 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43];
1085 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44];
1086 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45];
1087 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46];
1088 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47];
1089 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48];
1090 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49];
1091 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50];
1092 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51];
1093 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52];
1094 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53];
1095 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54];
1096 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55];
1098 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
1099 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
1100 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
1101 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
1104 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
1105 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
1106 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
1107 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
1110 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
1111 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
1112 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
1113 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
1116 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
1117 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
1118 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
1119 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
1123 __constant u32 k_sha256[64] =
1125 SHA256C00, SHA256C01, SHA256C02, SHA256C03,
1126 SHA256C04, SHA256C05, SHA256C06, SHA256C07,
1127 SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
1128 SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
1129 SHA256C10, SHA256C11, SHA256C12, SHA256C13,
1130 SHA256C14, SHA256C15, SHA256C16, SHA256C17,
1131 SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
1132 SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
1133 SHA256C20, SHA256C21, SHA256C22, SHA256C23,
1134 SHA256C24, SHA256C25, SHA256C26, SHA256C27,
1135 SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
1136 SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
1137 SHA256C30, SHA256C31, SHA256C32, SHA256C33,
1138 SHA256C34, SHA256C35, SHA256C36, SHA256C37,
1139 SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
1140 SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
1143 static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8])
1171 #define ROUND_EXPAND() \
1173 w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
1174 w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
1175 w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
1176 w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
1177 w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
1178 w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
1179 w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
1180 w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
1181 w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
1182 w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
1183 wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
1184 wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
1185 wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
1186 wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
1187 we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
1188 wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
1191 #define ROUND_STEP(i) \
1193 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
1194 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
1195 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
1196 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
1197 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
1198 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
1199 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
1200 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
1201 SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
1202 SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
1203 SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
1204 SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
1205 SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
1206 SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
1207 SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
1208 SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
1214 for (int i = 16; i < 64; i += 16)
1216 ROUND_EXPAND (); ROUND_STEP (i);
1229 static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
1257 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
1258 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
1259 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
1260 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
1261 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
1262 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
1263 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
1264 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
1265 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
1266 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
1267 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
1268 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
1269 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
1270 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
1271 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
1272 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
1273 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
1274 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
1275 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
1276 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
1281 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
1282 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
1283 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
1284 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
1285 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
1286 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
1287 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
1288 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
1289 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
1290 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
1291 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
1292 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
1293 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
1294 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
1295 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
1296 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
1297 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
1298 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
1299 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
1300 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
1305 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
1306 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
1307 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
1308 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
1309 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
1310 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
1311 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
1312 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
1313 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
1314 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
1315 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
1316 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
1317 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
1318 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
1319 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
1320 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
1321 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
1322 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
1323 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
1324 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
1329 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
1330 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
1331 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
1332 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
1333 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
1334 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
1335 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
1336 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
1337 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
1338 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
1339 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
1340 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
1341 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
1342 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
1343 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
1344 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
1345 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
1346 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
1347 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
1348 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
1357 static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5])
1359 w0[0] = w0[0] ^ 0x36363636;
1360 w0[1] = w0[1] ^ 0x36363636;
1361 w0[2] = w0[2] ^ 0x36363636;
1362 w0[3] = w0[3] ^ 0x36363636;
1363 w1[0] = w1[0] ^ 0x36363636;
1364 w1[1] = w1[1] ^ 0x36363636;
1365 w1[2] = w1[2] ^ 0x36363636;
1366 w1[3] = w1[3] ^ 0x36363636;
1367 w2[0] = w2[0] ^ 0x36363636;
1368 w2[1] = w2[1] ^ 0x36363636;
1369 w2[2] = w2[2] ^ 0x36363636;
1370 w2[3] = w2[3] ^ 0x36363636;
1371 w3[0] = w3[0] ^ 0x36363636;
1372 w3[1] = w3[1] ^ 0x36363636;
1373 w3[2] = w3[2] ^ 0x36363636;
1374 w3[3] = w3[3] ^ 0x36363636;
1382 sha1_transform (w0, w1, w2, w3, ipad);
1384 w0[0] = w0[0] ^ 0x6a6a6a6a;
1385 w0[1] = w0[1] ^ 0x6a6a6a6a;
1386 w0[2] = w0[2] ^ 0x6a6a6a6a;
1387 w0[3] = w0[3] ^ 0x6a6a6a6a;
1388 w1[0] = w1[0] ^ 0x6a6a6a6a;
1389 w1[1] = w1[1] ^ 0x6a6a6a6a;
1390 w1[2] = w1[2] ^ 0x6a6a6a6a;
1391 w1[3] = w1[3] ^ 0x6a6a6a6a;
1392 w2[0] = w2[0] ^ 0x6a6a6a6a;
1393 w2[1] = w2[1] ^ 0x6a6a6a6a;
1394 w2[2] = w2[2] ^ 0x6a6a6a6a;
1395 w2[3] = w2[3] ^ 0x6a6a6a6a;
1396 w3[0] = w3[0] ^ 0x6a6a6a6a;
1397 w3[1] = w3[1] ^ 0x6a6a6a6a;
1398 w3[2] = w3[2] ^ 0x6a6a6a6a;
1399 w3[3] = w3[3] ^ 0x6a6a6a6a;
1407 sha1_transform (w0, w1, w2, w3, opad);
1410 static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5])
1412 digest[0] = ipad[0];
1413 digest[1] = ipad[1];
1414 digest[2] = ipad[2];
1415 digest[3] = ipad[3];
1416 digest[4] = ipad[4];
1418 sha1_transform (w0, w1, w2, w3, digest);
1435 w3[3] = (64 + 20) * 8;
1437 digest[0] = opad[0];
1438 digest[1] = opad[1];
1439 digest[2] = opad[2];
1440 digest[3] = opad[3];
1441 digest[4] = opad[4];
1443 sha1_transform (w0, w1, w2, w3, digest);
1446 __kernel void m08800_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global androidfde_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global androidfde_t *androidfde_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1452 const u32 gid = get_global_id (0);
1454 if (gid >= gid_max) return;
1458 w0[0] = pws[gid].i[ 0];
1459 w0[1] = pws[gid].i[ 1];
1460 w0[2] = pws[gid].i[ 2];
1461 w0[3] = pws[gid].i[ 3];
1465 w1[0] = pws[gid].i[ 4];
1466 w1[1] = pws[gid].i[ 5];
1467 w1[2] = pws[gid].i[ 6];
1468 w1[3] = pws[gid].i[ 7];
1472 w2[0] = pws[gid].i[ 8];
1473 w2[1] = pws[gid].i[ 9];
1474 w2[2] = pws[gid].i[10];
1475 w2[3] = pws[gid].i[11];
1479 w3[0] = pws[gid].i[12];
1480 w3[1] = pws[gid].i[13];
1481 w3[2] = pws[gid].i[14];
1482 w3[3] = pws[gid].i[15];
1492 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1493 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1494 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
1495 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
1501 w0[0] = swap32 (w0[0]);
1502 w0[1] = swap32 (w0[1]);
1503 w0[2] = swap32 (w0[2]);
1504 w0[3] = swap32 (w0[3]);
1505 w1[0] = swap32 (w1[0]);
1506 w1[1] = swap32 (w1[1]);
1507 w1[2] = swap32 (w1[2]);
1508 w1[3] = swap32 (w1[3]);
1509 w2[0] = swap32 (w2[0]);
1510 w2[1] = swap32 (w2[1]);
1511 w2[2] = swap32 (w2[2]);
1512 w2[3] = swap32 (w2[3]);
1513 w3[0] = swap32 (w3[0]);
1514 w3[1] = swap32 (w3[1]);
1515 w3[2] = swap32 (w3[2]);
1516 w3[3] = swap32 (w3[3]);
1521 hmac_sha1_pad (w0, w1, w2, w3, ipad, opad);
1523 tmps[gid].ipad[0] = ipad[0];
1524 tmps[gid].ipad[1] = ipad[1];
1525 tmps[gid].ipad[2] = ipad[2];
1526 tmps[gid].ipad[3] = ipad[3];
1527 tmps[gid].ipad[4] = ipad[4];
1529 tmps[gid].opad[0] = opad[0];
1530 tmps[gid].opad[1] = opad[1];
1531 tmps[gid].opad[2] = opad[2];
1532 tmps[gid].opad[3] = opad[3];
1533 tmps[gid].opad[4] = opad[4];
1535 for (u32 i = 0, j = 1; i < 8; i += 5, j += 1)
1537 w0[0] = salt_buf[0];
1538 w0[1] = salt_buf[1];
1539 w0[2] = salt_buf[2];
1540 w0[3] = salt_buf[3];
1555 append_0x01_3x4 (w0, w1, w2, salt_len + 3);
1557 append_0x02_3x4 (w0, w1, w2, salt_len + 3);
1559 append_0x80_3x4 (w0, w1, w2, salt_len + 4);
1561 w0[0] = swap32 (w0[0]);
1562 w0[1] = swap32 (w0[1]);
1563 w0[2] = swap32 (w0[2]);
1564 w0[3] = swap32 (w0[3]);
1565 w1[0] = swap32 (w1[0]);
1566 w1[1] = swap32 (w1[1]);
1567 w1[2] = swap32 (w1[2]);
1568 w1[3] = swap32 (w1[3]);
1569 w2[0] = swap32 (w2[0]);
1570 w2[1] = swap32 (w2[1]);
1576 w3[3] = (64 + salt_len + 4) * 8;
1580 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst);
1582 tmps[gid].dgst[i + 0] = dgst[0];
1583 tmps[gid].dgst[i + 1] = dgst[1];
1584 tmps[gid].dgst[i + 2] = dgst[2];
1585 tmps[gid].dgst[i + 3] = dgst[3];
1586 tmps[gid].dgst[i + 4] = dgst[4];
1588 tmps[gid].out[i + 0] = dgst[0];
1589 tmps[gid].out[i + 1] = dgst[1];
1590 tmps[gid].out[i + 2] = dgst[2];
1591 tmps[gid].out[i + 3] = dgst[3];
1592 tmps[gid].out[i + 4] = dgst[4];
1596 __kernel void m08800_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global androidfde_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global androidfde_t *androidfde_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1598 const u32 gid = get_global_id (0);
1600 if (gid >= gid_max) return;
1605 ipad[0] = tmps[gid].ipad[0];
1606 ipad[1] = tmps[gid].ipad[1];
1607 ipad[2] = tmps[gid].ipad[2];
1608 ipad[3] = tmps[gid].ipad[3];
1609 ipad[4] = tmps[gid].ipad[4];
1611 opad[0] = tmps[gid].opad[0];
1612 opad[1] = tmps[gid].opad[1];
1613 opad[2] = tmps[gid].opad[2];
1614 opad[3] = tmps[gid].opad[3];
1615 opad[4] = tmps[gid].opad[4];
1617 for (u32 i = 0; i < 8; i += 5)
1622 dgst[0] = tmps[gid].dgst[i + 0];
1623 dgst[1] = tmps[gid].dgst[i + 1];
1624 dgst[2] = tmps[gid].dgst[i + 2];
1625 dgst[3] = tmps[gid].dgst[i + 3];
1626 dgst[4] = tmps[gid].dgst[i + 4];
1628 out[0] = tmps[gid].out[i + 0];
1629 out[1] = tmps[gid].out[i + 1];
1630 out[2] = tmps[gid].out[i + 2];
1631 out[3] = tmps[gid].out[i + 3];
1632 out[4] = tmps[gid].out[i + 4];
1634 for (u32 j = 0; j < loop_cnt; j++)
1656 w3[3] = (64 + 20) * 8;
1658 hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst);
1667 tmps[gid].dgst[i + 0] = dgst[0];
1668 tmps[gid].dgst[i + 1] = dgst[1];
1669 tmps[gid].dgst[i + 2] = dgst[2];
1670 tmps[gid].dgst[i + 3] = dgst[3];
1671 tmps[gid].dgst[i + 4] = dgst[4];
1673 tmps[gid].out[i + 0] = out[0];
1674 tmps[gid].out[i + 1] = out[1];
1675 tmps[gid].out[i + 2] = out[2];
1676 tmps[gid].out[i + 3] = out[3];
1677 tmps[gid].out[i + 4] = out[4];
1681 __kernel void m08800_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global androidfde_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global androidfde_t *androidfde_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1687 const u32 gid = get_global_id (0);
1688 const u32 lid = get_local_id (0);
1689 const u32 lsz = get_local_size (0);
1695 __local u32 s_td0[256];
1696 __local u32 s_td1[256];
1697 __local u32 s_td2[256];
1698 __local u32 s_td3[256];
1699 __local u32 s_td4[256];
1701 __local u32 s_te0[256];
1702 __local u32 s_te1[256];
1703 __local u32 s_te2[256];
1704 __local u32 s_te3[256];
1705 __local u32 s_te4[256];
1707 for (u32 i = lid; i < 256; i += lsz)
1722 barrier (CLK_LOCAL_MEM_FENCE);
1724 if (gid >= gid_max) return;
1743 ukeyx[0] = tmps[gid].out[0];
1744 ukeyx[1] = tmps[gid].out[1];
1745 ukeyx[2] = tmps[gid].out[2];
1746 ukeyx[3] = tmps[gid].out[3];
1761 data[0] = digests_buf[digests_offset].digest_buf[0];
1762 data[1] = digests_buf[digests_offset].digest_buf[1];
1763 data[2] = digests_buf[digests_offset].digest_buf[2];
1764 data[3] = digests_buf[digests_offset].digest_buf[3];
1766 iv[0] = tmps[gid].out[4];
1767 iv[1] = tmps[gid].out[5];
1768 iv[2] = tmps[gid].out[6];
1769 iv[3] = tmps[gid].out[7];
1771 AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1773 for (u32 i = 0; i < 44; i++) rdk[i] = rek[i];
1775 AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1777 AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1785 * section AES_cbc_essiv() starting
1788 // 1. start with simple sha256_transform
1792 essivhash[0] = SHA256M_A;
1793 essivhash[1] = SHA256M_B;
1794 essivhash[2] = SHA256M_C;
1795 essivhash[3] = SHA256M_D;
1796 essivhash[4] = SHA256M_E;
1797 essivhash[5] = SHA256M_F;
1798 essivhash[6] = SHA256M_G;
1799 essivhash[7] = SHA256M_H;
1823 sha256_transform (w0, w1, w2, w3, essivhash);
1828 // 2. generate essiv based on startsector -- each 512 byte is one sector
1835 ukeyx[0] = essivhash[0];
1836 ukeyx[1] = essivhash[1];
1837 ukeyx[2] = essivhash[2];
1838 ukeyx[3] = essivhash[3];
1839 ukeyx[4] = essivhash[4];
1840 ukeyx[5] = essivhash[5];
1841 ukeyx[6] = essivhash[6];
1842 ukeyx[7] = essivhash[7];
1844 AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1848 AES256_encrypt (data, essiv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1850 // 3. decrypt real data, xor essiv afterwards
1852 data[0] = androidfde_bufs[salt_pos].data[0];
1853 data[1] = androidfde_bufs[salt_pos].data[1];
1854 data[2] = androidfde_bufs[salt_pos].data[2];
1855 data[3] = androidfde_bufs[salt_pos].data[3];
1871 AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1873 for (u32 i = 0; i < 44; i++) rdk[i] = rek[i];
1875 AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1877 AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1879 u32 r0 = out[0] ^ iv[0];
1880 u32 r1 = out[1] ^ iv[1];
1881 u32 r2 = out[2] ^ iv[2];
1882 u32 r3 = out[3] ^ iv[3];
1884 // rotate 3 byte (static in fat!)
1886 r0 = r1 << 8 | r0 >> 24;
1887 r1 = r2 << 8 | r1 >> 24;
1890 if ((r0 == 0x4f44534d) && (r1 == 0x302e3553))
1892 mark_hash (plains_buf, hashes_shown, digests_offset + 0, gid, 0);
1894 d_return_buf[lid] = 1;
1903 // 2. generate essiv based on startsector -- each 512 byte is one sector
1905 // not needed because of cbc mode -- implementation flaw !!. first 16 byte are not interessting
1912 // 3. decrypt real data
1923 AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1925 for (u32 i = 0; i < 44; i++) rdk[i] = rek[i];
1927 AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1929 for (u32 i = 4; i < 16; i += 4)
1931 data[0] = androidfde_bufs[salt_pos].data[256 + i + 0];
1932 data[1] = androidfde_bufs[salt_pos].data[256 + i + 1];
1933 data[2] = androidfde_bufs[salt_pos].data[256 + i + 2];
1934 data[3] = androidfde_bufs[salt_pos].data[256 + i + 3];
1936 iv[0] = androidfde_bufs[salt_pos].data[256 + i + 0 - 4];
1937 iv[1] = androidfde_bufs[salt_pos].data[256 + i + 1 - 4];
1938 iv[2] = androidfde_bufs[salt_pos].data[256 + i + 2 - 4];
1939 iv[3] = androidfde_bufs[salt_pos].data[256 + i + 3 - 4];
1941 AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1943 r[i + 0] = out[0] ^ iv[0];
1944 r[i + 1] = out[1] ^ iv[1];
1945 r[i + 2] = out[2] ^ iv[2];
1946 r[i + 3] = out[3] ^ iv[3];
1949 // we need just a few swapped, because we do not access the others
1950 r[ 5] = swap32 (r[ 5]);
1951 r[ 6] = swap32 (r[ 6]);
1952 r[14] = swap32 (r[14]);
1954 // superblock not on id 0 or 1
1955 // assumes max block size is 32MiB
1956 // has EXT2_SUPER_MAGIC
1958 if ((r[5] < 2) && (r[6] < 16) && ((r[14] & 0xffff) == 0xEF53))
1960 mark_hash (plains_buf, hashes_shown, digests_offset + 0, gid, 0);
1962 d_return_buf[lid] = 1;