2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
44 __constant u32 te0[256] =
46 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
47 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
48 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
49 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
50 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
51 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
52 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
53 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
54 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
55 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
56 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
57 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
58 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
59 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
60 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
61 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
62 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
63 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
64 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
65 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
66 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
67 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
68 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
69 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
70 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
71 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
72 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
73 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
74 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
75 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
76 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
77 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
78 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
79 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
80 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
81 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
82 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
83 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
84 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
85 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
86 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
87 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
88 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
89 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
90 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
91 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
92 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
93 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
94 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
95 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
96 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
97 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
98 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
99 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
100 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
101 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
102 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
103 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
104 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
105 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
106 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
107 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
108 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
109 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
112 __constant u32 te1[256] =
114 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
115 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
116 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
117 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
118 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
119 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
120 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
121 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
122 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
123 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
124 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
125 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
126 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
127 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
128 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
129 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
130 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
131 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
132 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
133 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
134 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
135 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
136 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
137 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
138 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
139 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
140 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
141 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
142 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
143 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
144 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
145 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
146 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
147 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
148 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
149 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
150 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
151 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
152 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
153 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
154 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
155 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
156 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
157 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
158 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
159 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
160 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
161 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
162 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
163 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
164 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
165 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
166 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
167 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
168 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
169 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
170 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
171 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
172 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
173 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
174 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
175 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
176 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
177 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
180 __constant u32 te2[256] =
182 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
183 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
184 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
185 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
186 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
187 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
188 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
189 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
190 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
191 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
192 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
193 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
194 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
195 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
196 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
197 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
198 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
199 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
200 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
201 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
202 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
203 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
204 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
205 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
206 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
207 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
208 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
209 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
210 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
211 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
212 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
213 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
214 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
215 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
216 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
217 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
218 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
219 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
220 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
221 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
222 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
223 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
224 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
225 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
226 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
227 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
228 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
229 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
230 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
231 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
232 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
233 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
234 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
235 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
236 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
237 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
238 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
239 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
240 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
241 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
242 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
243 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
244 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
245 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
248 __constant u32 te3[256] =
250 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
251 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
252 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
253 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
254 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
255 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
256 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
257 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
258 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
259 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
260 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
261 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
262 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
263 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
264 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
265 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
266 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
267 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
268 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
269 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
270 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
271 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
272 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
273 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
274 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
275 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
276 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
277 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
278 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
279 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
280 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
281 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
282 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
283 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
284 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
285 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
286 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
287 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
288 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
289 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
290 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
291 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
292 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
293 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
294 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
295 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
296 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
297 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
298 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
299 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
300 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
301 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
302 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
303 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
304 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
305 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
306 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
307 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
308 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
309 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
310 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
311 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
312 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
313 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
316 __constant u32 te4[256] =
318 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
319 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
320 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
321 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
322 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
323 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
324 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
325 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
326 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
327 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
328 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
329 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
330 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
331 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
332 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
333 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
334 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
335 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
336 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
337 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
338 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
339 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
340 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
341 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
342 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
343 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
344 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
345 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
346 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
347 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
348 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
349 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
350 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
351 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
352 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
353 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
354 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
355 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
356 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
357 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
358 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
359 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
360 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
361 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
362 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
363 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
364 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
365 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
366 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
367 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
368 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
369 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
370 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
371 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
372 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
373 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
374 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
375 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
376 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
377 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
378 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
379 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
380 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
381 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
384 __constant u32 td0[256] =
386 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
387 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
388 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
389 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
390 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
391 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
392 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
393 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
394 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
395 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
396 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
397 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
398 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
399 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
400 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
401 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
402 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
403 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
404 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
405 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
406 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
407 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
408 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
409 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
410 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
411 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
412 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
413 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
414 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
415 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
416 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
417 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
418 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
419 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
420 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
421 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
422 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
423 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
424 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
425 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
426 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
427 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
428 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
429 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
430 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
431 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
432 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
433 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
434 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
435 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
436 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
437 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
438 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
439 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
440 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
441 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
442 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
443 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
444 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
445 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
446 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
447 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
448 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
449 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
452 __constant u32 td1[256] =
454 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
455 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
456 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
457 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
458 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
459 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
460 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
461 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
462 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
463 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
464 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
465 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
466 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
467 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
468 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
469 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
470 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
471 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
472 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
473 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
474 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
475 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
476 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
477 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
478 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
479 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
480 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
481 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
482 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
483 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
484 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
485 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
486 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
487 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
488 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
489 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
490 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
491 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
492 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
493 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
494 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
495 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
496 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
497 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
498 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
499 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
500 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
501 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
502 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
503 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
504 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
505 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
506 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
507 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
508 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
509 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
510 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
511 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
512 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
513 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
514 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
515 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
516 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
517 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
520 __constant u32 td2[256] =
522 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
523 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
524 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
525 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
526 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
527 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
528 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
529 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
530 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
531 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
532 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
533 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
534 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
535 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
536 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
537 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
538 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
539 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
540 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
541 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
542 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
543 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
544 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
545 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
546 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
547 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
548 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
549 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
550 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
551 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
552 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
553 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
554 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
555 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
556 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
557 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
558 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
559 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
560 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
561 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
562 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
563 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
564 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
565 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
566 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
567 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
568 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
569 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
570 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
571 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
572 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
573 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
574 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
575 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
576 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
577 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
578 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
579 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
580 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
581 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
582 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
583 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
584 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
585 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
588 __constant u32 td3[256] =
590 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
591 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
592 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
593 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
594 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
595 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
596 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
597 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
598 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
599 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
600 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
601 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
602 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
603 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
604 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
605 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
606 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
607 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
608 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
609 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
610 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
611 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
612 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
613 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
614 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
615 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
616 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
617 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
618 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
619 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
620 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
621 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
622 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
623 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
624 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
625 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
626 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
627 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
628 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
629 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
630 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
631 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
632 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
633 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
634 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
635 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
636 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
637 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
638 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
639 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
640 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
641 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
642 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
643 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
644 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
645 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
646 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
647 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
648 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
649 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
650 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
651 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
652 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
653 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
656 __constant u32 td4[256] =
658 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
659 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
660 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
661 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
662 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
663 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
664 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
665 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
666 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
667 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
668 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
669 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
670 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
671 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
672 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
673 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
674 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
675 0x86868686, 0x68686868, 0x98989898, 0x16161616,
676 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
677 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
678 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
679 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
680 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
681 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
682 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
683 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
684 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
685 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
686 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
687 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
688 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
689 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
690 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
691 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
692 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
693 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
694 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
695 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
696 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
697 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
698 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
699 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
700 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
701 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
702 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
703 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
704 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
705 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
706 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
707 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
708 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
709 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
710 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
711 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
712 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
713 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
714 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
715 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
716 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
717 0x83838383, 0x53535353, 0x99999999, 0x61616161,
718 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
719 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
720 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
721 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
724 __constant u32 rcon[] =
726 0x01000000, 0x02000000, 0x04000000, 0x08000000,
727 0x10000000, 0x20000000, 0x40000000, 0x80000000,
728 0x1b000000, 0x36000000,
731 static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
738 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
740 u32 temp = rek[j + 3];
742 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
743 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
744 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
745 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
747 rek[j + 4] = rek[j + 0]
751 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
752 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
753 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
757 static void AES128_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
759 for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
763 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
764 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
765 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
766 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
769 for (u32 i = 1, j = 4; i < 10; i += 1, j += 4)
772 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
773 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
774 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
775 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
778 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
779 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
780 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
781 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
784 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
785 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
786 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
787 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
790 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
791 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
792 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
793 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
797 static void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
799 u32 s0 = in[0] ^ rek[0];
800 u32 s1 = in[1] ^ rek[1];
801 u32 s2 = in[2] ^ rek[2];
802 u32 s3 = in[3] ^ rek[3];
809 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
810 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
811 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
812 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
813 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
814 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
815 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
816 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
817 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
818 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
819 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
820 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
821 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
822 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
823 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
824 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
825 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
826 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
827 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
828 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
829 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
830 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
831 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
832 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
833 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
834 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
835 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
836 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
837 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
838 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
839 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
840 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
841 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
842 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
843 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
844 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
846 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
847 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
848 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
849 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
852 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
853 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
854 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
855 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
858 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
859 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
860 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
861 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
864 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
865 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
866 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
867 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
871 static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
873 u32 s0 = in[0] ^ rdk[0];
874 u32 s1 = in[1] ^ rdk[1];
875 u32 s2 = in[2] ^ rdk[2];
876 u32 s3 = in[3] ^ rdk[3];
883 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
884 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
885 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
886 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
887 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
888 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
889 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
890 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
891 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
892 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
893 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
894 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
895 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
896 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
897 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
898 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
899 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
900 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
901 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
902 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
903 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
904 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
905 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
906 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
907 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
908 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
909 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
910 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
911 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
912 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
913 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
914 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
915 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
916 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
917 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
918 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
920 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
921 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
922 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
923 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
926 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
927 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
928 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
929 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
932 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
933 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
934 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
935 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
938 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
939 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
940 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
941 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
945 static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
964 u32 temp = rek[j + 7];
966 rek[j + 8] = rek[j + 0]
967 ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000)
968 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
969 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
970 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff)
973 rek[j + 9] = rek[j + 1] ^ rek[j + 8];
974 rek[j + 10] = rek[j + 2] ^ rek[j + 9];
975 rek[j + 11] = rek[j + 3] ^ rek[j + 10];
981 rek[j + 12] = rek[j + 4]
982 ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000)
983 ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000)
984 ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00)
985 ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff);
987 rek[j + 13] = rek[j + 5] ^ rek[j + 12];
988 rek[j + 14] = rek[j + 6] ^ rek[j + 13];
989 rek[j + 15] = rek[j + 7] ^ rek[j + 14];
995 static void AES256_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
997 for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
1001 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
1002 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
1003 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
1004 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
1007 for (u32 i = 1, j = 4; i < 14; i += 1, j += 4)
1010 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
1011 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
1012 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
1013 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
1016 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
1017 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
1018 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
1019 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
1022 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
1023 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
1024 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
1025 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
1028 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
1029 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
1030 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
1031 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
1035 static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
1037 u32 s0 = in[0] ^ rdk[0];
1038 u32 s1 = in[1] ^ rdk[1];
1039 u32 s2 = in[2] ^ rdk[2];
1040 u32 s3 = in[3] ^ rdk[3];
1047 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
1048 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
1049 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
1050 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
1051 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
1052 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
1053 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
1054 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
1055 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
1056 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
1057 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
1058 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
1059 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
1060 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
1061 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
1062 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
1063 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
1064 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
1065 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
1066 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
1067 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
1068 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
1069 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
1070 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
1071 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
1072 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
1073 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
1074 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
1075 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
1076 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
1077 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
1078 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
1079 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
1080 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
1081 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
1082 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
1083 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40];
1084 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41];
1085 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42];
1086 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43];
1087 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44];
1088 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45];
1089 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46];
1090 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47];
1091 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48];
1092 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49];
1093 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50];
1094 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51];
1095 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52];
1096 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53];
1097 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54];
1098 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55];
1100 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
1101 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
1102 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
1103 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
1106 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
1107 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
1108 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
1109 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
1112 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
1113 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
1114 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
1115 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
1118 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
1119 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
1120 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
1121 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
1125 static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
1127 u32 s0 = in[0] ^ rek[0];
1128 u32 s1 = in[1] ^ rek[1];
1129 u32 s2 = in[2] ^ rek[2];
1130 u32 s3 = in[3] ^ rek[3];
1137 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
1138 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
1139 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
1140 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
1141 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
1142 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
1143 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
1144 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
1145 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
1146 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
1147 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
1148 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
1149 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
1150 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
1151 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
1152 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
1153 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
1154 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
1155 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
1156 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
1157 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
1158 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
1159 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
1160 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
1161 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
1162 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
1163 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
1164 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
1165 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
1166 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
1167 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
1168 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
1169 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
1170 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
1171 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
1172 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
1173 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40];
1174 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41];
1175 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42];
1176 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43];
1177 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44];
1178 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45];
1179 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46];
1180 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47];
1181 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48];
1182 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49];
1183 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50];
1184 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51];
1185 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52];
1186 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53];
1187 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54];
1188 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55];
1190 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
1191 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
1192 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
1193 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
1196 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
1197 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
1198 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
1199 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
1202 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
1203 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
1204 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
1205 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
1208 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
1209 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
1210 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
1211 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
1215 static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5])
1243 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
1244 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
1245 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
1246 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
1247 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
1248 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
1249 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
1250 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
1251 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
1252 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
1253 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
1254 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
1255 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
1256 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
1257 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
1258 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
1259 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
1260 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
1261 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
1262 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
1267 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
1268 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
1269 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
1270 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
1271 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
1272 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
1273 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
1274 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
1275 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
1276 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
1277 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
1278 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
1279 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
1280 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
1281 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
1282 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
1283 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
1284 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
1285 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
1286 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
1291 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
1292 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
1293 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
1294 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
1295 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
1296 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
1297 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
1298 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
1299 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
1300 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
1301 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
1302 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
1303 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
1304 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
1305 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
1306 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
1307 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
1308 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
1309 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
1310 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
1315 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
1316 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
1317 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
1318 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
1319 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
1320 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
1321 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
1322 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
1323 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
1324 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
1325 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
1326 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
1327 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
1328 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
1329 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
1330 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
1331 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
1332 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
1333 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
1334 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
1343 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2007_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2007_t *office2007_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1349 const u32 gid = get_global_id (0);
1351 if (gid >= gid_max) return;
1355 w0[0] = pws[gid].i[ 0];
1356 w0[1] = pws[gid].i[ 1];
1357 w0[2] = pws[gid].i[ 2];
1358 w0[3] = pws[gid].i[ 3];
1362 w1[0] = pws[gid].i[ 4];
1363 w1[1] = pws[gid].i[ 5];
1364 w1[2] = pws[gid].i[ 6];
1365 w1[3] = pws[gid].i[ 7];
1369 w2[0] = pws[gid].i[ 8];
1370 w2[1] = pws[gid].i[ 9];
1371 w2[2] = pws[gid].i[10];
1372 w2[3] = pws[gid].i[11];
1376 w3[0] = pws[gid].i[12];
1377 w3[1] = pws[gid].i[13];
1378 w3[2] = pws[gid].i[14];
1379 w3[3] = pws[gid].i[15];
1381 u32 pw_len = pws[gid].pw_len;
1383 append_0x80_4 (w0, w1, w2, w3, pw_len);
1385 make_unicode (w1, w2, w3);
1386 make_unicode (w0, w0, w1);
1392 u32 salt_len = salt_bufs[salt_pos].salt_len;
1396 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1397 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1398 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
1399 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
1407 t0[0] = salt_buf[0];
1408 t0[1] = salt_buf[1];
1409 t0[2] = salt_buf[2];
1410 t0[3] = salt_buf[3];
1414 t1[0] = swap_workaround (w0[0]);
1415 t1[1] = swap_workaround (w0[1]);
1416 t1[2] = swap_workaround (w0[2]);
1417 t1[3] = swap_workaround (w0[3]);
1421 t2[0] = swap_workaround (w1[0]);
1422 t2[1] = swap_workaround (w1[1]);
1423 t2[2] = swap_workaround (w1[2]);
1424 t2[3] = swap_workaround (w1[3]);
1428 t3[0] = swap_workaround (w2[0]);
1429 t3[1] = swap_workaround (w2[1]);
1431 t3[3] = (salt_len + (pw_len * 2)) * 8;
1435 digest[0] = SHA1M_A;
1436 digest[1] = SHA1M_B;
1437 digest[2] = SHA1M_C;
1438 digest[3] = SHA1M_D;
1439 digest[4] = SHA1M_E;
1441 sha1_transform (t0, t1, t2, t3, digest);
1443 tmps[gid].out[0] = digest[0];
1444 tmps[gid].out[1] = digest[1];
1445 tmps[gid].out[2] = digest[2];
1446 tmps[gid].out[3] = digest[3];
1447 tmps[gid].out[4] = digest[4];
1450 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2007_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2007_t *office2007_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1452 const u32 gid = get_global_id (0);
1454 if (gid >= gid_max) return;
1459 w0[1] = tmps[gid].out[0];
1460 w0[2] = tmps[gid].out[1];
1461 w0[3] = tmps[gid].out[2];
1465 w1[0] = tmps[gid].out[3];
1466 w1[1] = tmps[gid].out[4];
1482 w3[3] = (4 + 20) * 8;
1484 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1486 w0[0] = swap_workaround (j);
1490 digest[0] = SHA1M_A;
1491 digest[1] = SHA1M_B;
1492 digest[2] = SHA1M_C;
1493 digest[3] = SHA1M_D;
1494 digest[4] = SHA1M_E;
1496 sha1_transform (w0, w1, w2, w3, digest);
1505 tmps[gid].out[0] = w0[1];
1506 tmps[gid].out[1] = w0[2];
1507 tmps[gid].out[2] = w0[3];
1508 tmps[gid].out[3] = w1[0];
1509 tmps[gid].out[4] = w1[1];
1512 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2007_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2007_t *office2007_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1514 const u32 gid = get_global_id (0);
1515 const u32 lid = get_local_id (0);
1521 const u32 lid4 = lid * 4;
1523 __local u32 s_td0[256];
1524 __local u32 s_td1[256];
1525 __local u32 s_td2[256];
1526 __local u32 s_td3[256];
1527 __local u32 s_td4[256];
1529 __local u32 s_te0[256];
1530 __local u32 s_te1[256];
1531 __local u32 s_te2[256];
1532 __local u32 s_te3[256];
1533 __local u32 s_te4[256];
1535 s_td0[lid4 + 0] = td0[lid4 + 0];
1536 s_td0[lid4 + 1] = td0[lid4 + 1];
1537 s_td0[lid4 + 2] = td0[lid4 + 2];
1538 s_td0[lid4 + 3] = td0[lid4 + 3];
1540 s_td1[lid4 + 0] = td1[lid4 + 0];
1541 s_td1[lid4 + 1] = td1[lid4 + 1];
1542 s_td1[lid4 + 2] = td1[lid4 + 2];
1543 s_td1[lid4 + 3] = td1[lid4 + 3];
1545 s_td2[lid4 + 0] = td2[lid4 + 0];
1546 s_td2[lid4 + 1] = td2[lid4 + 1];
1547 s_td2[lid4 + 2] = td2[lid4 + 2];
1548 s_td2[lid4 + 3] = td2[lid4 + 3];
1550 s_td3[lid4 + 0] = td3[lid4 + 0];
1551 s_td3[lid4 + 1] = td3[lid4 + 1];
1552 s_td3[lid4 + 2] = td3[lid4 + 2];
1553 s_td3[lid4 + 3] = td3[lid4 + 3];
1555 s_td4[lid4 + 0] = td4[lid4 + 0];
1556 s_td4[lid4 + 1] = td4[lid4 + 1];
1557 s_td4[lid4 + 2] = td4[lid4 + 2];
1558 s_td4[lid4 + 3] = td4[lid4 + 3];
1560 s_te0[lid4 + 0] = te0[lid4 + 0];
1561 s_te0[lid4 + 1] = te0[lid4 + 1];
1562 s_te0[lid4 + 2] = te0[lid4 + 2];
1563 s_te0[lid4 + 3] = te0[lid4 + 3];
1565 s_te1[lid4 + 0] = te1[lid4 + 0];
1566 s_te1[lid4 + 1] = te1[lid4 + 1];
1567 s_te1[lid4 + 2] = te1[lid4 + 2];
1568 s_te1[lid4 + 3] = te1[lid4 + 3];
1570 s_te2[lid4 + 0] = te2[lid4 + 0];
1571 s_te2[lid4 + 1] = te2[lid4 + 1];
1572 s_te2[lid4 + 2] = te2[lid4 + 2];
1573 s_te2[lid4 + 3] = te2[lid4 + 3];
1575 s_te3[lid4 + 0] = te3[lid4 + 0];
1576 s_te3[lid4 + 1] = te3[lid4 + 1];
1577 s_te3[lid4 + 2] = te3[lid4 + 2];
1578 s_te3[lid4 + 3] = te3[lid4 + 3];
1580 s_te4[lid4 + 0] = te4[lid4 + 0];
1581 s_te4[lid4 + 1] = te4[lid4 + 1];
1582 s_te4[lid4 + 2] = te4[lid4 + 2];
1583 s_te4[lid4 + 3] = te4[lid4 + 3];
1585 barrier (CLK_LOCAL_MEM_FENCE);
1587 if (gid >= gid_max) return;
1591 w0[0] = tmps[gid].out[0];
1592 w0[1] = tmps[gid].out[1];
1593 w0[2] = tmps[gid].out[2];
1594 w0[3] = tmps[gid].out[3];
1598 w1[0] = tmps[gid].out[4];
1615 w3[3] = (20 + 4) * 8;
1617 u32 digest_common[5];
1619 digest_common[0] = SHA1M_A;
1620 digest_common[1] = SHA1M_B;
1621 digest_common[2] = SHA1M_C;
1622 digest_common[3] = SHA1M_D;
1623 digest_common[4] = SHA1M_E;
1625 sha1_transform (w0, w1, w2, w3, digest_common);
1627 w0[0] = 0x36363636 ^ digest_common[0];
1628 w0[1] = 0x36363636 ^ digest_common[1];
1629 w0[2] = 0x36363636 ^ digest_common[2];
1630 w0[3] = 0x36363636 ^ digest_common[3];
1631 w1[0] = 0x36363636 ^ digest_common[4];
1646 digest[0] = SHA1M_A;
1647 digest[1] = SHA1M_B;
1648 digest[2] = SHA1M_C;
1649 digest[3] = SHA1M_D;
1650 digest[4] = SHA1M_E;
1652 sha1_transform (w0, w1, w2, w3, digest);
1671 sha1_transform (w0, w1, w2, w3, digest);
1673 // save digest for the AES-256 variant
1675 u32 digest_saved[5];
1677 digest_saved[0] = digest[0];
1678 digest_saved[1] = digest[1];
1679 digest_saved[2] = digest[2];
1680 digest_saved[3] = digest[3];
1681 digest_saved[4] = digest[4];
1683 // now we got the AES key, decrypt the verifier
1690 verifier[0] = office2007_bufs[salt_pos].encryptedVerifier[0];
1691 verifier[1] = office2007_bufs[salt_pos].encryptedVerifier[1];
1692 verifier[2] = office2007_bufs[salt_pos].encryptedVerifier[2];
1693 verifier[3] = office2007_bufs[salt_pos].encryptedVerifier[3];
1697 data[0] = verifier[0];
1698 data[1] = verifier[1];
1699 data[2] = verifier[2];
1700 data[3] = verifier[3];
1709 ukeyx[0] = digest[0];
1710 ukeyx[1] = digest[1];
1711 ukeyx[2] = digest[2];
1712 ukeyx[3] = digest[3];
1714 AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1716 for (u32 i = 0; i < 44; i++) rdk[i] = rek[i];
1718 AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1722 AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1724 // do a sha1 of the result
1743 digest[0] = SHA1M_A;
1744 digest[1] = SHA1M_B;
1745 digest[2] = SHA1M_C;
1746 digest[3] = SHA1M_D;
1747 digest[4] = SHA1M_E;
1749 sha1_transform (w0, w1, w2, w3, digest);
1751 // encrypt it again for verify
1753 data[0] = digest[0];
1754 data[1] = digest[1];
1755 data[2] = digest[2];
1756 data[3] = digest[3];
1758 AES128_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1761 const u32x r0 = out[0];
1762 const u32x r1 = out[1];
1763 const u32x r2 = out[2];
1764 const u32x r3 = out[3];
1768 #include VECT_COMPARE_M
1775 // try same procedure but with AES-256
1777 w0[0] = 0x5c5c5c5c ^ digest_common[0];
1778 w0[1] = 0x5c5c5c5c ^ digest_common[1];
1779 w0[2] = 0x5c5c5c5c ^ digest_common[2];
1780 w0[3] = 0x5c5c5c5c ^ digest_common[3];
1781 w1[0] = 0x5c5c5c5c ^ digest_common[4];
1794 digest[0] = SHA1M_A;
1795 digest[1] = SHA1M_B;
1796 digest[2] = SHA1M_C;
1797 digest[3] = SHA1M_D;
1798 digest[4] = SHA1M_E;
1800 sha1_transform (w0, w1, w2, w3, digest);
1819 sha1_transform (w0, w1, w2, w3, digest);
1821 // now we got the AES key, decrypt the verifier
1823 ukeyx[0] = digest_saved[0];
1824 ukeyx[1] = digest_saved[1];
1825 ukeyx[2] = digest_saved[2];
1826 ukeyx[3] = digest_saved[3];
1827 ukeyx[4] = digest_saved[4];
1828 ukeyx[5] = digest[0];
1829 ukeyx[6] = digest[1];
1830 ukeyx[7] = digest[2];
1832 AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1834 for (u32 i = 0; i < 60; i++) rdk[i] = rek[i];
1836 AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1838 data[0] = verifier[0];
1839 data[1] = verifier[1];
1840 data[2] = verifier[2];
1841 data[3] = verifier[3];
1843 AES256_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1845 // do a sha1 of the result
1864 digest[0] = SHA1M_A;
1865 digest[1] = SHA1M_B;
1866 digest[2] = SHA1M_C;
1867 digest[3] = SHA1M_D;
1868 digest[4] = SHA1M_E;
1870 sha1_transform (w0, w1, w2, w3, digest);
1872 // encrypt it again for verify
1874 data[0] = digest[0];
1875 data[1] = digest[1];
1876 data[2] = digest[2];
1877 data[3] = digest[3];
1879 AES256_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1882 const u32x r0 = out[0];
1883 const u32x r1 = out[1];
1884 const u32x r2 = out[2];
1885 const u32x r3 = out[3];
1889 #include VECT_COMPARE_M