2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
44 __constant u32 te0[256] =
46 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
47 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
48 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
49 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
50 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
51 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
52 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
53 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
54 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
55 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
56 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
57 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
58 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
59 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
60 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
61 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
62 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
63 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
64 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
65 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
66 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
67 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
68 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
69 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
70 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
71 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
72 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
73 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
74 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
75 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
76 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
77 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
78 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
79 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
80 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
81 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
82 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
83 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
84 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
85 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
86 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
87 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
88 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
89 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
90 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
91 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
92 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
93 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
94 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
95 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
96 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
97 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
98 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
99 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
100 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
101 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
102 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
103 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
104 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
105 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
106 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
107 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
108 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
109 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
112 __constant u32 te1[256] =
114 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
115 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
116 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
117 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
118 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
119 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
120 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
121 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
122 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
123 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
124 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
125 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
126 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
127 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
128 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
129 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
130 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
131 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
132 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
133 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
134 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
135 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
136 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
137 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
138 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
139 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
140 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
141 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
142 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
143 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
144 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
145 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
146 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
147 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
148 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
149 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
150 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
151 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
152 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
153 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
154 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
155 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
156 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
157 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
158 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
159 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
160 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
161 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
162 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
163 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
164 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
165 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
166 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
167 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
168 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
169 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
170 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
171 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
172 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
173 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
174 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
175 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
176 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
177 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
180 __constant u32 te2[256] =
182 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
183 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
184 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
185 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
186 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
187 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
188 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
189 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
190 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
191 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
192 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
193 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
194 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
195 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
196 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
197 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
198 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
199 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
200 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
201 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
202 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
203 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
204 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
205 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
206 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
207 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
208 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
209 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
210 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
211 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
212 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
213 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
214 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
215 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
216 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
217 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
218 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
219 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
220 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
221 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
222 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
223 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
224 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
225 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
226 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
227 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
228 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
229 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
230 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
231 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
232 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
233 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
234 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
235 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
236 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
237 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
238 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
239 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
240 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
241 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
242 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
243 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
244 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
245 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
248 __constant u32 te3[256] =
250 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
251 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
252 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
253 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
254 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
255 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
256 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
257 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
258 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
259 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
260 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
261 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
262 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
263 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
264 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
265 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
266 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
267 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
268 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
269 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
270 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
271 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
272 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
273 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
274 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
275 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
276 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
277 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
278 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
279 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
280 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
281 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
282 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
283 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
284 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
285 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
286 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
287 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
288 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
289 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
290 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
291 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
292 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
293 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
294 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
295 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
296 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
297 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
298 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
299 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
300 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
301 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
302 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
303 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
304 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
305 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
306 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
307 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
308 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
309 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
310 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
311 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
312 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
313 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
316 __constant u32 te4[256] =
318 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
319 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
320 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
321 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
322 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
323 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
324 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
325 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
326 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
327 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
328 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
329 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
330 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
331 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
332 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
333 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
334 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
335 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
336 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
337 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
338 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
339 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
340 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
341 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
342 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
343 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
344 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
345 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
346 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
347 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
348 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
349 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
350 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
351 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
352 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
353 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
354 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
355 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
356 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
357 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
358 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
359 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
360 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
361 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
362 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
363 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
364 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
365 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
366 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
367 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
368 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
369 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
370 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
371 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
372 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
373 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
374 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
375 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
376 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
377 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
378 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
379 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
380 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
381 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
384 __constant u32 td0[256] =
386 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
387 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
388 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
389 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
390 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
391 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
392 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
393 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
394 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
395 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
396 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
397 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
398 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
399 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
400 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
401 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
402 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
403 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
404 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
405 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
406 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
407 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
408 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
409 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
410 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
411 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
412 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
413 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
414 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
415 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
416 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
417 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
418 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
419 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
420 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
421 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
422 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
423 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
424 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
425 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
426 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
427 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
428 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
429 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
430 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
431 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
432 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
433 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
434 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
435 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
436 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
437 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
438 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
439 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
440 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
441 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
442 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
443 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
444 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
445 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
446 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
447 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
448 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
449 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
452 __constant u32 td1[256] =
454 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
455 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
456 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
457 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
458 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
459 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
460 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
461 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
462 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
463 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
464 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
465 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
466 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
467 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
468 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
469 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
470 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
471 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
472 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
473 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
474 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
475 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
476 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
477 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
478 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
479 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
480 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
481 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
482 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
483 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
484 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
485 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
486 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
487 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
488 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
489 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
490 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
491 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
492 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
493 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
494 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
495 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
496 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
497 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
498 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
499 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
500 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
501 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
502 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
503 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
504 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
505 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
506 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
507 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
508 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
509 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
510 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
511 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
512 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
513 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
514 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
515 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
516 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
517 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
520 __constant u32 td2[256] =
522 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
523 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
524 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
525 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
526 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
527 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
528 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
529 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
530 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
531 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
532 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
533 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
534 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
535 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
536 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
537 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
538 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
539 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
540 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
541 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
542 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
543 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
544 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
545 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
546 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
547 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
548 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
549 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
550 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
551 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
552 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
553 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
554 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
555 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
556 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
557 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
558 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
559 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
560 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
561 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
562 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
563 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
564 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
565 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
566 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
567 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
568 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
569 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
570 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
571 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
572 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
573 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
574 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
575 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
576 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
577 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
578 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
579 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
580 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
581 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
582 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
583 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
584 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
585 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
588 __constant u32 td3[256] =
590 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
591 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
592 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
593 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
594 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
595 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
596 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
597 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
598 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
599 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
600 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
601 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
602 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
603 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
604 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
605 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
606 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
607 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
608 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
609 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
610 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
611 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
612 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
613 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
614 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
615 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
616 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
617 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
618 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
619 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
620 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
621 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
622 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
623 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
624 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
625 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
626 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
627 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
628 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
629 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
630 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
631 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
632 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
633 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
634 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
635 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
636 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
637 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
638 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
639 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
640 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
641 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
642 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
643 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
644 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
645 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
646 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
647 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
648 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
649 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
650 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
651 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
652 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
653 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
656 __constant u32 td4[256] =
658 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
659 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
660 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
661 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
662 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
663 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
664 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
665 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
666 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
667 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
668 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
669 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
670 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
671 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
672 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
673 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
674 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
675 0x86868686, 0x68686868, 0x98989898, 0x16161616,
676 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
677 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
678 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
679 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
680 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
681 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
682 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
683 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
684 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
685 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
686 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
687 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
688 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
689 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
690 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
691 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
692 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
693 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
694 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
695 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
696 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
697 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
698 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
699 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
700 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
701 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
702 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
703 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
704 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
705 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
706 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
707 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
708 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
709 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
710 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
711 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
712 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
713 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
714 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
715 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
716 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
717 0x83838383, 0x53535353, 0x99999999, 0x61616161,
718 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
719 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
720 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
721 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
724 __constant u32 rcon[] =
726 0x01000000, 0x02000000, 0x04000000, 0x08000000,
727 0x10000000, 0x20000000, 0x40000000, 0x80000000,
728 0x1b000000, 0x36000000,
731 static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
738 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
740 u32 temp = rek[j + 3];
742 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
743 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
744 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
745 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
747 rek[j + 4] = rek[j + 0]
751 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
752 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
753 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
757 static void AES128_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
759 for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
763 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
764 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
765 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
766 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
769 for (u32 i = 1, j = 4; i < 10; i += 1, j += 4)
772 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
773 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
774 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
775 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
778 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
779 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
780 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
781 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
784 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
785 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
786 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
787 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
790 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
791 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
792 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
793 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
797 static void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
799 u32 s0 = in[0] ^ rek[0];
800 u32 s1 = in[1] ^ rek[1];
801 u32 s2 = in[2] ^ rek[2];
802 u32 s3 = in[3] ^ rek[3];
809 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4];
810 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5];
811 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6];
812 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7];
813 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8];
814 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9];
815 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10];
816 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11];
817 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12];
818 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13];
819 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14];
820 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15];
821 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16];
822 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17];
823 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18];
824 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19];
825 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20];
826 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21];
827 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22];
828 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23];
829 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24];
830 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25];
831 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26];
832 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27];
833 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28];
834 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29];
835 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30];
836 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31];
837 s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32];
838 s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33];
839 s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34];
840 s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35];
841 t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36];
842 t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37];
843 t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38];
844 t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39];
846 out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
847 ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
848 ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
849 ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
852 out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
853 ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
854 ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
855 ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
858 out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
859 ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
860 ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
861 ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
864 out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
865 ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
866 ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
867 ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
871 static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
873 u32 s0 = in[0] ^ rdk[0];
874 u32 s1 = in[1] ^ rdk[1];
875 u32 s2 = in[2] ^ rdk[2];
876 u32 s3 = in[3] ^ rdk[3];
883 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
884 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
885 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
886 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
887 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
888 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
889 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
890 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
891 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
892 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
893 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
894 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
895 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
896 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
897 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
898 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
899 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
900 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
901 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
902 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
903 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
904 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
905 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
906 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
907 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
908 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
909 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
910 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
911 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
912 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
913 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
914 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
915 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
916 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
917 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
918 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
920 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
921 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
922 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
923 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
926 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
927 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
928 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
929 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
932 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
933 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
934 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
935 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
938 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
939 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
940 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
941 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
945 static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5])
973 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
974 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
975 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
976 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
977 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
978 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
979 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
980 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
981 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
982 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
983 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
984 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
985 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
986 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
987 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
988 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
989 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
990 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
991 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
992 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
997 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
998 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
999 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
1000 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
1001 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
1002 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
1003 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
1004 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
1005 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
1006 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
1007 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
1008 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
1009 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
1010 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
1011 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
1012 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
1013 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
1014 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
1015 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
1016 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
1021 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
1022 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
1023 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
1024 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
1025 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
1026 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
1027 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
1028 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
1029 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
1030 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
1031 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
1032 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
1033 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
1034 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
1035 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
1036 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
1037 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
1038 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
1039 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
1040 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
1045 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
1046 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
1047 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
1048 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
1049 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
1050 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
1051 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
1052 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
1053 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
1054 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
1055 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
1056 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
1057 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
1058 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
1059 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
1060 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
1061 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
1062 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
1063 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
1064 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
1073 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2010_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2010_t *office2010_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1079 const u32 gid = get_global_id (0);
1081 if (gid >= gid_max) return;
1085 w0[0] = pws[gid].i[ 0];
1086 w0[1] = pws[gid].i[ 1];
1087 w0[2] = pws[gid].i[ 2];
1088 w0[3] = pws[gid].i[ 3];
1092 w1[0] = pws[gid].i[ 4];
1093 w1[1] = pws[gid].i[ 5];
1094 w1[2] = pws[gid].i[ 6];
1095 w1[3] = pws[gid].i[ 7];
1099 w2[0] = pws[gid].i[ 8];
1100 w2[1] = pws[gid].i[ 9];
1101 w2[2] = pws[gid].i[10];
1102 w2[3] = pws[gid].i[11];
1106 w3[0] = pws[gid].i[12];
1107 w3[1] = pws[gid].i[13];
1108 w3[2] = pws[gid].i[14];
1109 w3[3] = pws[gid].i[15];
1111 u32 pw_len = pws[gid].pw_len;
1113 append_0x80_4 (w0, w1, w2, w3, pw_len);
1115 make_unicode (w1, w2, w3);
1116 make_unicode (w0, w0, w1);
1122 u32 salt_len = salt_bufs[salt_pos].salt_len;
1126 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1127 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1128 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
1129 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
1137 t0[0] = salt_buf[0];
1138 t0[1] = salt_buf[1];
1139 t0[2] = salt_buf[2];
1140 t0[3] = salt_buf[3];
1144 t1[0] = swap_workaround (w0[0]);
1145 t1[1] = swap_workaround (w0[1]);
1146 t1[2] = swap_workaround (w0[2]);
1147 t1[3] = swap_workaround (w0[3]);
1151 t2[0] = swap_workaround (w1[0]);
1152 t2[1] = swap_workaround (w1[1]);
1153 t2[2] = swap_workaround (w1[2]);
1154 t2[3] = swap_workaround (w1[3]);
1158 t3[0] = swap_workaround (w2[0]);
1159 t3[1] = swap_workaround (w2[1]);
1161 t3[3] = (salt_len + (pw_len * 2)) * 8;
1165 digest[0] = SHA1M_A;
1166 digest[1] = SHA1M_B;
1167 digest[2] = SHA1M_C;
1168 digest[3] = SHA1M_D;
1169 digest[4] = SHA1M_E;
1171 sha1_transform (t0, t1, t2, t3, digest);
1173 tmps[gid].out[0] = digest[0];
1174 tmps[gid].out[1] = digest[1];
1175 tmps[gid].out[2] = digest[2];
1176 tmps[gid].out[3] = digest[3];
1177 tmps[gid].out[4] = digest[4];
1180 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2010_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2010_t *office2010_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1182 const u32 gid = get_global_id (0);
1184 if (gid >= gid_max) return;
1189 w0[1] = tmps[gid].out[0];
1190 w0[2] = tmps[gid].out[1];
1191 w0[3] = tmps[gid].out[2];
1195 w1[0] = tmps[gid].out[3];
1196 w1[1] = tmps[gid].out[4];
1212 w3[3] = (4 + 20) * 8;
1214 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1216 w0[0] = swap_workaround (j);
1220 digest[0] = SHA1M_A;
1221 digest[1] = SHA1M_B;
1222 digest[2] = SHA1M_C;
1223 digest[3] = SHA1M_D;
1224 digest[4] = SHA1M_E;
1226 sha1_transform (w0, w1, w2, w3, digest);
1235 tmps[gid].out[0] = w0[1];
1236 tmps[gid].out[1] = w0[2];
1237 tmps[gid].out[2] = w0[3];
1238 tmps[gid].out[3] = w1[0];
1239 tmps[gid].out[4] = w1[1];
1242 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global office2010_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global office2010_t *office2010_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1244 const u32 gid = get_global_id (0);
1245 const u32 lid = get_local_id (0);
1251 const u32 lid4 = lid * 4;
1253 __local u32 s_td0[256];
1254 __local u32 s_td1[256];
1255 __local u32 s_td2[256];
1256 __local u32 s_td3[256];
1257 __local u32 s_td4[256];
1259 __local u32 s_te0[256];
1260 __local u32 s_te1[256];
1261 __local u32 s_te2[256];
1262 __local u32 s_te3[256];
1263 __local u32 s_te4[256];
1265 s_td0[lid4 + 0] = td0[lid4 + 0];
1266 s_td0[lid4 + 1] = td0[lid4 + 1];
1267 s_td0[lid4 + 2] = td0[lid4 + 2];
1268 s_td0[lid4 + 3] = td0[lid4 + 3];
1270 s_td1[lid4 + 0] = td1[lid4 + 0];
1271 s_td1[lid4 + 1] = td1[lid4 + 1];
1272 s_td1[lid4 + 2] = td1[lid4 + 2];
1273 s_td1[lid4 + 3] = td1[lid4 + 3];
1275 s_td2[lid4 + 0] = td2[lid4 + 0];
1276 s_td2[lid4 + 1] = td2[lid4 + 1];
1277 s_td2[lid4 + 2] = td2[lid4 + 2];
1278 s_td2[lid4 + 3] = td2[lid4 + 3];
1280 s_td3[lid4 + 0] = td3[lid4 + 0];
1281 s_td3[lid4 + 1] = td3[lid4 + 1];
1282 s_td3[lid4 + 2] = td3[lid4 + 2];
1283 s_td3[lid4 + 3] = td3[lid4 + 3];
1285 s_td4[lid4 + 0] = td4[lid4 + 0];
1286 s_td4[lid4 + 1] = td4[lid4 + 1];
1287 s_td4[lid4 + 2] = td4[lid4 + 2];
1288 s_td4[lid4 + 3] = td4[lid4 + 3];
1290 s_te0[lid4 + 0] = te0[lid4 + 0];
1291 s_te0[lid4 + 1] = te0[lid4 + 1];
1292 s_te0[lid4 + 2] = te0[lid4 + 2];
1293 s_te0[lid4 + 3] = te0[lid4 + 3];
1295 s_te1[lid4 + 0] = te1[lid4 + 0];
1296 s_te1[lid4 + 1] = te1[lid4 + 1];
1297 s_te1[lid4 + 2] = te1[lid4 + 2];
1298 s_te1[lid4 + 3] = te1[lid4 + 3];
1300 s_te2[lid4 + 0] = te2[lid4 + 0];
1301 s_te2[lid4 + 1] = te2[lid4 + 1];
1302 s_te2[lid4 + 2] = te2[lid4 + 2];
1303 s_te2[lid4 + 3] = te2[lid4 + 3];
1305 s_te3[lid4 + 0] = te3[lid4 + 0];
1306 s_te3[lid4 + 1] = te3[lid4 + 1];
1307 s_te3[lid4 + 2] = te3[lid4 + 2];
1308 s_te3[lid4 + 3] = te3[lid4 + 3];
1310 s_te4[lid4 + 0] = te4[lid4 + 0];
1311 s_te4[lid4 + 1] = te4[lid4 + 1];
1312 s_te4[lid4 + 2] = te4[lid4 + 2];
1313 s_te4[lid4 + 3] = te4[lid4 + 3];
1315 barrier (CLK_LOCAL_MEM_FENCE);
1317 if (gid >= gid_max) return;
1319 u32x encryptedVerifierHashInputBlockKey[2] = { 0xfea7d276, 0x3b4b9e79 };
1320 u32x encryptedVerifierHashValueBlockKey[2] = { 0xd7aa0f6d, 0x3061344e };
1327 w0[0] = tmps[gid].out[0];
1328 w0[1] = tmps[gid].out[1];
1329 w0[2] = tmps[gid].out[2];
1330 w0[3] = tmps[gid].out[3];
1331 w1[0] = tmps[gid].out[4];
1332 w1[1] = encryptedVerifierHashInputBlockKey[0];
1333 w1[2] = encryptedVerifierHashInputBlockKey[1];
1342 w3[3] = (20 + 8) * 8;
1346 digest0[0] = SHA1M_A;
1347 digest0[1] = SHA1M_B;
1348 digest0[2] = SHA1M_C;
1349 digest0[3] = SHA1M_D;
1350 digest0[4] = SHA1M_E;
1352 sha1_transform (w0, w1, w2, w3, digest0);
1354 w0[0] = tmps[gid].out[0];
1355 w0[1] = tmps[gid].out[1];
1356 w0[2] = tmps[gid].out[2];
1357 w0[3] = tmps[gid].out[3];
1358 w1[0] = tmps[gid].out[4];
1359 w1[1] = encryptedVerifierHashValueBlockKey[0];
1360 w1[2] = encryptedVerifierHashValueBlockKey[1];
1369 w3[3] = (20 + 8) * 8;
1373 digest1[0] = SHA1M_A;
1374 digest1[1] = SHA1M_B;
1375 digest1[2] = SHA1M_C;
1376 digest1[3] = SHA1M_D;
1377 digest1[4] = SHA1M_E;
1379 sha1_transform (w0, w1, w2, w3, digest1);
1381 // now we got the AES key, decrypt the verifier
1388 data[0] = office2010_bufs[salt_pos].encryptedVerifier[0];
1389 data[1] = office2010_bufs[salt_pos].encryptedVerifier[1];
1390 data[2] = office2010_bufs[salt_pos].encryptedVerifier[2];
1391 data[3] = office2010_bufs[salt_pos].encryptedVerifier[3];
1395 ukeyx[0] = digest0[0];
1396 ukeyx[1] = digest0[1];
1397 ukeyx[2] = digest0[2];
1398 ukeyx[3] = digest0[3];
1400 AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1402 for (u32 i = 0; i < 44; i++) rdk[i] = rek[i];
1404 AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1408 AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4);
1410 out[0] ^= salt_bufs[salt_pos].salt_buf[0];
1411 out[1] ^= salt_bufs[salt_pos].salt_buf[1];
1412 out[2] ^= salt_bufs[salt_pos].salt_buf[2];
1413 out[3] ^= salt_bufs[salt_pos].salt_buf[3];
1415 // do a sha1 of the result
1436 digest[0] = SHA1M_A;
1437 digest[1] = SHA1M_B;
1438 digest[2] = SHA1M_C;
1439 digest[3] = SHA1M_D;
1440 digest[4] = SHA1M_E;
1442 sha1_transform (w0, w1, w2, w3, digest);
1444 // encrypt it again for verify
1446 ukeyx[0] = digest1[0];
1447 ukeyx[1] = digest1[1];
1448 ukeyx[2] = digest1[2];
1449 ukeyx[3] = digest1[3];
1451 AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1453 data[0] = digest[0] ^ salt_bufs[salt_pos].salt_buf[0];
1454 data[1] = digest[1] ^ salt_bufs[salt_pos].salt_buf[1];
1455 data[2] = digest[2] ^ salt_bufs[salt_pos].salt_buf[2];
1456 data[3] = digest[3] ^ salt_bufs[salt_pos].salt_buf[3];
1458 AES128_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
1460 const u32x r0 = out[0];
1461 const u32x r1 = out[1];
1462 const u32x r2 = out[2];
1463 const u32x r3 = out[3];
1467 #include VECT_COMPARE_M