2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
30 #include "types_amd.c"
31 #include "common_amd.c"
34 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
37 #define ROUNDS 0x40000
39 #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
40 #define GETCHAR(a,p) ((u8 *)(a))[(p)]
42 #define PUTCHAR_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c)
43 #define GETCHAR_BE(a,p) ((u8 *)(a))[(p) ^ 3]
45 __constant u32 te0[256] =
47 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
48 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
49 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
50 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
51 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
52 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
53 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
54 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
55 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
56 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
57 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
58 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
59 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
60 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
61 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
62 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
63 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
64 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
65 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
66 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
67 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
68 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
69 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
70 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
71 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
72 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
73 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
74 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
75 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
76 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
77 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
78 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
79 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
80 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
81 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
82 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
83 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
84 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
85 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
86 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
87 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
88 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
89 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
90 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
91 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
92 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
93 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
94 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
95 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
96 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
97 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
98 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
99 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
100 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
101 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
102 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
103 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
104 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
105 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
106 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
107 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
108 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
109 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
110 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
113 __constant u32 te1[256] =
115 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
116 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
117 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
118 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
119 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
120 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
121 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
122 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
123 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
124 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
125 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
126 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
127 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
128 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
129 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
130 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
131 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
132 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
133 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
134 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
135 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
136 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
137 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
138 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
139 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
140 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
141 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
142 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
143 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
144 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
145 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
146 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
147 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
148 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
149 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
150 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
151 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
152 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
153 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
154 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
155 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
156 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
157 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
158 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
159 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
160 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
161 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
162 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
163 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
164 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
165 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
166 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
167 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
168 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
169 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
170 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
171 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
172 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
173 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
174 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
175 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
176 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
177 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
178 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
181 __constant u32 te2[256] =
183 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
184 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
185 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
186 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
187 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
188 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
189 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
190 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
191 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
192 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
193 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
194 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
195 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
196 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
197 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
198 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
199 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
200 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
201 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
202 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
203 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
204 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
205 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
206 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
207 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
208 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
209 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
210 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
211 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
212 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
213 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
214 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
215 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
216 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
217 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
218 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
219 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
220 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
221 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
222 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
223 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
224 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
225 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
226 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
227 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
228 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
229 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
230 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
231 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
232 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
233 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
234 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
235 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
236 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
237 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
238 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
239 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
240 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
241 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
242 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
243 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
244 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
245 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
246 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
249 __constant u32 te3[256] =
251 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
252 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
253 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
254 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
255 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
256 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
257 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
258 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
259 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
260 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
261 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
262 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
263 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
264 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
265 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
266 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
267 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
268 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
269 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
270 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
271 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
272 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
273 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
274 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
275 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
276 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
277 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
278 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
279 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
280 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
281 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
282 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
283 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
284 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
285 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
286 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
287 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
288 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
289 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
290 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
291 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
292 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
293 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
294 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
295 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
296 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
297 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
298 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
299 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
300 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
301 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
302 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
303 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
304 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
305 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
306 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
307 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
308 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
309 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
310 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
311 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
312 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
313 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
314 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
317 __constant u32 te4[256] =
319 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
320 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
321 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
322 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
323 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
324 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
325 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
326 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
327 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
328 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
329 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
330 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
331 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
332 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
333 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
334 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
335 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
336 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
337 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
338 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
339 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
340 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
341 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
342 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
343 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
344 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
345 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
346 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
347 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
348 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
349 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
350 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
351 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
352 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
353 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
354 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
355 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
356 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
357 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
358 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
359 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
360 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
361 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
362 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
363 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
364 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
365 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
366 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
367 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
368 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
369 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
370 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
371 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
372 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
373 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
374 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
375 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
376 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
377 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
378 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
379 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
380 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
381 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
382 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
385 __constant u32 td0[256] =
387 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
388 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
389 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
390 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
391 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
392 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
393 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
394 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
395 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
396 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
397 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
398 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
399 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
400 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
401 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
402 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
403 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
404 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
405 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
406 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
407 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
408 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
409 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
410 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
411 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
412 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
413 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
414 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
415 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
416 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
417 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
418 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
419 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
420 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
421 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
422 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
423 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
424 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
425 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
426 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
427 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
428 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
429 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
430 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
431 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
432 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
433 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
434 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
435 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
436 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
437 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
438 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
439 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
440 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
441 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
442 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
443 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
444 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
445 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
446 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
447 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
448 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
449 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
450 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
453 __constant u32 td1[256] =
455 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
456 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
457 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
458 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
459 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
460 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
461 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
462 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
463 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
464 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
465 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
466 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
467 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
468 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
469 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
470 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
471 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
472 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
473 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
474 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
475 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
476 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
477 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
478 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
479 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
480 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
481 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
482 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
483 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
484 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
485 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
486 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
487 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
488 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
489 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
490 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
491 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
492 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
493 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
494 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
495 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
496 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
497 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
498 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
499 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
500 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
501 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
502 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
503 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
504 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
505 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
506 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
507 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
508 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
509 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
510 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
511 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
512 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
513 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
514 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
515 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
516 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
517 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
518 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
521 __constant u32 td2[256] =
523 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
524 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
525 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
526 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
527 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
528 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
529 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
530 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
531 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
532 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
533 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
534 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
535 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
536 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
537 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
538 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
539 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
540 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
541 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
542 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
543 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
544 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
545 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
546 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
547 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
548 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
549 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
550 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
551 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
552 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
553 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
554 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
555 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
556 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
557 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
558 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
559 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
560 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
561 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
562 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
563 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
564 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
565 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
566 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
567 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
568 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
569 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
570 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
571 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
572 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
573 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
574 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
575 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
576 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
577 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
578 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
579 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
580 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
581 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
582 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
583 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
584 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
585 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
586 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
589 __constant u32 td3[256] =
591 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
592 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
593 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
594 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
595 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
596 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
597 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
598 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
599 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
600 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
601 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
602 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
603 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
604 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
605 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
606 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
607 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
608 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
609 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
610 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
611 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
612 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
613 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
614 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
615 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
616 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
617 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
618 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
619 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
620 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
621 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
622 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
623 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
624 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
625 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
626 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
627 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
628 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
629 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
630 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
631 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
632 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
633 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
634 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
635 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
636 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
637 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
638 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
639 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
640 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
641 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
642 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
643 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
644 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
645 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
646 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
647 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
648 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
649 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
650 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
651 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
652 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
653 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
654 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
657 __constant u32 td4[256] =
659 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
660 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
661 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
662 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
663 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
664 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
665 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
666 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
667 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
668 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
669 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
670 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
671 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
672 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
673 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
674 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
675 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
676 0x86868686, 0x68686868, 0x98989898, 0x16161616,
677 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
678 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
679 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
680 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
681 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
682 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
683 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
684 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
685 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
686 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
687 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
688 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
689 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
690 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
691 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
692 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
693 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
694 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
695 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
696 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
697 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
698 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
699 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
700 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
701 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
702 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
703 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
704 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
705 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
706 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
707 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
708 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
709 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
710 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
711 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
712 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
713 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
714 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
715 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
716 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
717 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
718 0x83838383, 0x53535353, 0x99999999, 0x61616161,
719 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
720 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
721 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
722 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
725 __constant u32 rcon[] =
727 0x01000000, 0x02000000, 0x04000000, 0x08000000,
728 0x10000000, 0x20000000, 0x40000000, 0x80000000,
729 0x1b000000, 0x36000000,
732 static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
739 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
741 u32 temp = rek[j + 3];
743 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
744 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
745 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
746 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
748 rek[j + 4] = rek[j + 0]
752 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
753 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
754 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
758 static void AES128_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
760 for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
764 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
765 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
766 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
767 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
770 for (u32 i = 1, j = 4; i < 10; i += 1, j += 4)
773 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
774 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
775 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
776 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
779 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
780 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
781 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
782 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
785 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
786 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
787 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
788 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
791 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
792 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
793 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
794 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
798 static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
800 u32 s0 = in[0] ^ rdk[0];
801 u32 s1 = in[1] ^ rdk[1];
802 u32 s2 = in[2] ^ rdk[2];
803 u32 s3 = in[3] ^ rdk[3];
810 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
811 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
812 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
813 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
814 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
815 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
816 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
817 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
818 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
819 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
820 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
821 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
822 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
823 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
824 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
825 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
826 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
827 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
828 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
829 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
830 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
831 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
832 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
833 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
834 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
835 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
836 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
837 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
838 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
839 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
840 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
841 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
842 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
843 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
844 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
845 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
847 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
848 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
849 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
850 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
853 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
854 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
855 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
856 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
859 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
860 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
861 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
862 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
865 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
866 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
867 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
868 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
872 static void sha1_transform (const u32x w[16], u32x digest[5])
900 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
901 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
902 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
903 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
904 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
905 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
906 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
907 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
908 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
909 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
910 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
911 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
912 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
913 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
914 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
915 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
916 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
917 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
918 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
919 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
924 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
925 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
926 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
927 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
928 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
929 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
930 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
931 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
932 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
933 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
934 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
935 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
936 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
937 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
938 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
939 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
940 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
941 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
942 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
943 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
948 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
949 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
950 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
951 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
952 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
953 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
954 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
955 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
956 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
957 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
958 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
959 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
960 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
961 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
962 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
963 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
964 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
965 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
966 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
967 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
972 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
973 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
974 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
975 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
976 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
977 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
978 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
979 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
980 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
981 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
982 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
983 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
984 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
985 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
986 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
987 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
988 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
989 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
990 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
991 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
1000 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global rar3_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pbkdf2_sha1_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1006 const u32 gid = get_global_id (0);
1008 if (gid >= gid_max) return;
1010 tmps[gid].dgst[0][0] = SHA1M_A;
1011 tmps[gid].dgst[0][1] = SHA1M_B;
1012 tmps[gid].dgst[0][2] = SHA1M_C;
1013 tmps[gid].dgst[0][3] = SHA1M_D;
1014 tmps[gid].dgst[0][4] = SHA1M_E;
1017 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global rar3_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pbkdf2_sha1_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1019 const u32 gid = get_global_id (0);
1021 if (gid >= gid_max) return;
1025 pw_buf[0] = pws[gid].i[0];
1026 pw_buf[1] = pws[gid].i[1];
1027 pw_buf[2] = pws[gid].i[2];
1028 pw_buf[3] = pws[gid].i[3];
1029 pw_buf[4] = pws[gid].i[4];
1031 const u32 pw_len = pws[gid].pw_len;
1035 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1036 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1038 const u32 salt_len = 8;
1040 // this is large enough to hold all possible w[] arrays for 64 iterations
1042 #define LARGEBLOCK_ELEMS ((40 + 8 + 3) * 16)
1044 u32 largeblock[LARGEBLOCK_ELEMS];
1046 for (u32 i = 0; i < LARGEBLOCK_ELEMS; i++) largeblock[i] = 0;
1048 for (u32 i = 0, p = 0; i < 64; i++)
1050 for (u32 j = 0; j < pw_len; j++, p += 2)
1052 PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j));
1055 for (u32 j = 0; j < salt_len; j++, p += 1)
1057 PUTCHAR_BE (largeblock, p, GETCHAR (salt_buf, j));
1060 PUTCHAR_BE (largeblock, p + 2, (loop_pos >> 16) & 0xff);
1065 const u32 p3 = (pw_len * 2) + salt_len + 3;
1067 const u32 init_pos = loop_pos / (ROUNDS / 16);
1071 dgst[0] = tmps[gid].dgst[init_pos][0];
1072 dgst[1] = tmps[gid].dgst[init_pos][1];
1073 dgst[2] = tmps[gid].dgst[init_pos][2];
1074 dgst[3] = tmps[gid].dgst[init_pos][3];
1075 dgst[4] = tmps[gid].dgst[init_pos][4];
1077 u32 iter = loop_pos;
1079 for (u32 i = 0; i < 256; i += 4)
1081 for (u32 j = 0; j < 64; j++)
1083 const u32 p = ((j + 1) * p3) - 2;
1085 PUTCHAR_BE (largeblock, p, iter >> 8);
1088 for (u32 k = 0; k < 4; k++)
1090 for (u32 j = 0; j < 64; j++)
1092 const u32 p = ((j + 1) * p3) - 3;
1094 PUTCHAR_BE (largeblock, p, iter >> 0);
1099 for (u32 j = 0; j < p3; j++)
1101 const u32 j16 = j * 16;
1103 sha1_transform (&largeblock[j16], dgst);
1108 tmps[gid].dgst[init_pos + 1][0] = dgst[0];
1109 tmps[gid].dgst[init_pos + 1][1] = dgst[1];
1110 tmps[gid].dgst[init_pos + 1][2] = dgst[2];
1111 tmps[gid].dgst[init_pos + 1][3] = dgst[3];
1112 tmps[gid].dgst[init_pos + 1][4] = dgst[4];
1115 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global rar3_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pbkdf2_sha1_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1121 const u32 gid = get_global_id (0);
1122 const u32 lid = get_local_id (0);
1128 const u32 lid4 = lid * 4;
1130 __local u32 s_td0[256];
1131 __local u32 s_td1[256];
1132 __local u32 s_td2[256];
1133 __local u32 s_td3[256];
1134 __local u32 s_td4[256];
1136 __local u32 s_te0[256];
1137 __local u32 s_te1[256];
1138 __local u32 s_te2[256];
1139 __local u32 s_te3[256];
1140 __local u32 s_te4[256];
1142 s_td0[lid4 + 0] = td0[lid4 + 0];
1143 s_td0[lid4 + 1] = td0[lid4 + 1];
1144 s_td0[lid4 + 2] = td0[lid4 + 2];
1145 s_td0[lid4 + 3] = td0[lid4 + 3];
1147 s_td1[lid4 + 0] = td1[lid4 + 0];
1148 s_td1[lid4 + 1] = td1[lid4 + 1];
1149 s_td1[lid4 + 2] = td1[lid4 + 2];
1150 s_td1[lid4 + 3] = td1[lid4 + 3];
1152 s_td2[lid4 + 0] = td2[lid4 + 0];
1153 s_td2[lid4 + 1] = td2[lid4 + 1];
1154 s_td2[lid4 + 2] = td2[lid4 + 2];
1155 s_td2[lid4 + 3] = td2[lid4 + 3];
1157 s_td3[lid4 + 0] = td3[lid4 + 0];
1158 s_td3[lid4 + 1] = td3[lid4 + 1];
1159 s_td3[lid4 + 2] = td3[lid4 + 2];
1160 s_td3[lid4 + 3] = td3[lid4 + 3];
1162 s_td4[lid4 + 0] = td4[lid4 + 0];
1163 s_td4[lid4 + 1] = td4[lid4 + 1];
1164 s_td4[lid4 + 2] = td4[lid4 + 2];
1165 s_td4[lid4 + 3] = td4[lid4 + 3];
1167 s_te0[lid4 + 0] = te0[lid4 + 0];
1168 s_te0[lid4 + 1] = te0[lid4 + 1];
1169 s_te0[lid4 + 2] = te0[lid4 + 2];
1170 s_te0[lid4 + 3] = te0[lid4 + 3];
1172 s_te1[lid4 + 0] = te1[lid4 + 0];
1173 s_te1[lid4 + 1] = te1[lid4 + 1];
1174 s_te1[lid4 + 2] = te1[lid4 + 2];
1175 s_te1[lid4 + 3] = te1[lid4 + 3];
1177 s_te2[lid4 + 0] = te2[lid4 + 0];
1178 s_te2[lid4 + 1] = te2[lid4 + 1];
1179 s_te2[lid4 + 2] = te2[lid4 + 2];
1180 s_te2[lid4 + 3] = te2[lid4 + 3];
1182 s_te3[lid4 + 0] = te3[lid4 + 0];
1183 s_te3[lid4 + 1] = te3[lid4 + 1];
1184 s_te3[lid4 + 2] = te3[lid4 + 2];
1185 s_te3[lid4 + 3] = te3[lid4 + 3];
1187 s_te4[lid4 + 0] = te4[lid4 + 0];
1188 s_te4[lid4 + 1] = te4[lid4 + 1];
1189 s_te4[lid4 + 2] = te4[lid4 + 2];
1190 s_te4[lid4 + 3] = te4[lid4 + 3];
1192 barrier (CLK_LOCAL_MEM_FENCE);
1194 if (gid >= gid_max) return;
1196 const u32 pw_len = pws[gid].pw_len;
1198 const u32 salt_len = 8;
1200 const u32 p3 = (pw_len * 2) + salt_len + 3;
1204 w_buf[ 0] = 0x80000000;
1219 w_buf[15] = (p3 * ROUNDS) * 8;
1223 dgst[0] = tmps[gid].dgst[16][0];
1224 dgst[1] = tmps[gid].dgst[16][1];
1225 dgst[2] = tmps[gid].dgst[16][2];
1226 dgst[3] = tmps[gid].dgst[16][3];
1227 dgst[4] = tmps[gid].dgst[16][4];
1229 sha1_transform (w_buf, dgst);
1235 data[0] = salt_bufs[salt_pos].salt_buf[2];
1236 data[1] = salt_bufs[salt_pos].salt_buf[3];
1237 data[2] = salt_bufs[salt_pos].salt_buf[4];
1238 data[3] = salt_bufs[salt_pos].salt_buf[5];
1242 ukeyx[0] = swap_workaround (dgst[0]);
1243 ukeyx[1] = swap_workaround (dgst[1]);
1244 ukeyx[2] = swap_workaround (dgst[2]);
1245 ukeyx[3] = swap_workaround (dgst[3]);
1247 AES128_ExpandKey (ukeyx, rk, s_te0, s_te1, s_te2, s_te3, s_te4);
1249 AES128_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1253 AES128_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4);
1262 for (int i = 0; i < 16; i++)
1266 pw_buf[0] = pws[gid].i[0];
1267 pw_buf[1] = pws[gid].i[1];
1268 pw_buf[2] = pws[gid].i[2];
1269 pw_buf[3] = pws[gid].i[3];
1270 pw_buf[4] = pws[gid].i[4];
1272 const u32 pw_len = pws[gid].pw_len;
1276 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1277 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1279 const u32 salt_len = 8;
1281 const u32 p3 = (pw_len * 2) + salt_len + 3;
1304 for (u32 j = 0; j < pw_len; j++, p += 2)
1306 PUTCHAR_BE (w, p, GETCHAR (pw_buf, j));
1309 for (u32 j = 0; j < salt_len; j++, p += 1)
1311 PUTCHAR_BE (w, p, GETCHAR (salt_buf, j));
1314 const u32 iter_pos = i * (ROUNDS / 16);
1316 PUTCHAR_BE (w, p + 0, (iter_pos >> 0) & 0xff);
1317 PUTCHAR_BE (w, p + 1, (iter_pos >> 8) & 0xff);
1318 PUTCHAR_BE (w, p + 2, (iter_pos >> 16) & 0xff);
1320 PUTCHAR_BE (w, p3, 0x80);
1322 w[15] = ((iter_pos + 1) * p3) * 8;
1326 dgst[0] = tmps[gid].dgst[i][0];
1327 dgst[1] = tmps[gid].dgst[i][1];
1328 dgst[2] = tmps[gid].dgst[i][2];
1329 dgst[3] = tmps[gid].dgst[i][3];
1330 dgst[4] = tmps[gid].dgst[i][4];
1332 sha1_transform (w, dgst);
1334 PUTCHAR (iv, i, dgst[4] & 0xff);
1337 out[0] ^= swap_workaround (iv[0]);
1338 out[1] ^= swap_workaround (iv[1]);
1339 out[2] ^= swap_workaround (iv[2]);
1340 out[3] ^= swap_workaround (iv[3]);
1342 const u32x r0 = out[0];
1343 const u32x r1 = out[1];
1349 #include VECT_COMPARE_M