2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
18 #include "types_ocl.c"
22 #define COMPARE_M "check_multi_vect1_comp4.c"
25 #define ROUNDS 0x40000
27 #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
28 #define GETCHAR(a,p) ((u8 *)(a))[(p)]
30 #define PUTCHAR_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c)
31 #define GETCHAR_BE(a,p) ((u8 *)(a))[(p) ^ 3]
33 __constant u32 te0[256] =
35 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
36 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
37 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
38 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
39 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
40 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
41 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
42 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
43 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
44 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
45 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
46 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
47 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
48 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
49 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
50 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
51 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
52 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
53 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
54 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
55 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
56 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
57 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
58 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
59 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
60 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
61 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
62 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
63 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
64 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
65 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
66 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
67 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
68 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
69 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
70 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
71 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
72 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
73 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
74 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
75 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
76 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
77 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
78 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
79 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
80 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
81 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
82 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
83 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
84 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
85 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
86 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
87 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
88 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
89 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
90 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
91 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
92 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
93 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
94 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
95 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
96 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
97 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
98 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
101 __constant u32 te1[256] =
103 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
104 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
105 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
106 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
107 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
108 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
109 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
110 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
111 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
112 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
113 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
114 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
115 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
116 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
117 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
118 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
119 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
120 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
121 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
122 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
123 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
124 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
125 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
126 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
127 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
128 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
129 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
130 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
131 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
132 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
133 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
134 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
135 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
136 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
137 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
138 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
139 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
140 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
141 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
142 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
143 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
144 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
145 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
146 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
147 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
148 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
149 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
150 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
151 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
152 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
153 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
154 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
155 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
156 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
157 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
158 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
159 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
160 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
161 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
162 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
163 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
164 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
165 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
166 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
169 __constant u32 te2[256] =
171 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
172 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
173 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
174 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
175 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
176 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
177 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
178 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
179 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
180 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
181 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
182 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
183 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
184 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
185 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
186 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
187 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
188 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
189 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
190 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
191 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
192 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
193 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
194 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
195 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
196 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
197 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
198 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
199 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
200 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
201 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
202 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
203 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
204 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
205 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
206 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
207 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
208 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
209 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
210 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
211 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
212 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
213 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
214 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
215 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
216 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
217 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
218 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
219 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
220 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
221 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
222 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
223 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
224 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
225 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
226 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
227 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
228 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
229 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
230 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
231 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
232 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
233 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
234 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
237 __constant u32 te3[256] =
239 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
240 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
241 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
242 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
243 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
244 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
245 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
246 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
247 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
248 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
249 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
250 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
251 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
252 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
253 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
254 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
255 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
256 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
257 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
258 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
259 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
260 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
261 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
262 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
263 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
264 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
265 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
266 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
267 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
268 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
269 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
270 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
271 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
272 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
273 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
274 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
275 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
276 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
277 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
278 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
279 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
280 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
281 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
282 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
283 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
284 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
285 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
286 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
287 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
288 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
289 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
290 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
291 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
292 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
293 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
294 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
295 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
296 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
297 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
298 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
299 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
300 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
301 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
302 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
305 __constant u32 te4[256] =
307 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
308 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
309 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
310 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
311 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
312 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
313 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
314 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
315 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
316 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
317 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
318 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
319 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
320 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
321 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
322 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
323 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
324 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
325 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
326 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
327 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
328 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
329 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
330 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
331 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
332 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
333 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
334 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
335 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
336 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
337 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
338 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
339 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
340 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
341 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
342 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
343 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
344 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
345 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
346 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
347 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
348 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
349 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
350 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
351 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
352 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
353 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
354 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
355 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
356 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
357 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
358 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
359 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
360 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
361 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
362 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
363 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
364 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
365 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
366 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
367 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
368 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
369 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
370 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
373 __constant u32 td0[256] =
375 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
376 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
377 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
378 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
379 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
380 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
381 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
382 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
383 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
384 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
385 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
386 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
387 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
388 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
389 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
390 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
391 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
392 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
393 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
394 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
395 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
396 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
397 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
398 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
399 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
400 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
401 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
402 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
403 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
404 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
405 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
406 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
407 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
408 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
409 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
410 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
411 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
412 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
413 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
414 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
415 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
416 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
417 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
418 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
419 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
420 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
421 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
422 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
423 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
424 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
425 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
426 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
427 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
428 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
429 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
430 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
431 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
432 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
433 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
434 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
435 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
436 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
437 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
438 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
441 __constant u32 td1[256] =
443 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
444 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
445 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
446 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
447 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
448 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
449 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
450 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
451 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
452 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
453 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
454 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
455 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
456 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
457 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
458 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
459 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
460 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
461 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
462 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
463 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
464 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
465 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
466 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
467 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
468 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
469 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
470 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
471 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
472 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
473 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
474 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
475 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
476 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
477 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
478 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
479 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
480 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
481 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
482 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
483 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
484 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
485 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
486 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
487 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
488 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
489 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
490 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
491 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
492 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
493 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
494 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
495 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
496 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
497 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
498 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
499 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
500 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
501 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
502 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
503 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
504 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
505 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
506 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
509 __constant u32 td2[256] =
511 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
512 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
513 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
514 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
515 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
516 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
517 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
518 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
519 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
520 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
521 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
522 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
523 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
524 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
525 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
526 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
527 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
528 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
529 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
530 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
531 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
532 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
533 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
534 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
535 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
536 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
537 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
538 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
539 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
540 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
541 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
542 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
543 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
544 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
545 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
546 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
547 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
548 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
549 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
550 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
551 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
552 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
553 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
554 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
555 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
556 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
557 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
558 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
559 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
560 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
561 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
562 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
563 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
564 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
565 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
566 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
567 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
568 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
569 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
570 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
571 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
572 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
573 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
574 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
577 __constant u32 td3[256] =
579 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
580 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
581 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
582 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
583 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
584 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
585 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
586 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
587 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
588 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
589 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
590 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
591 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
592 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
593 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
594 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
595 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
596 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
597 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
598 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
599 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
600 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
601 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
602 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
603 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
604 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
605 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
606 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
607 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
608 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
609 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
610 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
611 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
612 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
613 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
614 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
615 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
616 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
617 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
618 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
619 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
620 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
621 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
622 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
623 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
624 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
625 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
626 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
627 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
628 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
629 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
630 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
631 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
632 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
633 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
634 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
635 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
636 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
637 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
638 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
639 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
640 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
641 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
642 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
645 __constant u32 td4[256] =
647 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
648 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
649 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
650 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
651 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
652 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
653 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
654 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
655 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
656 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
657 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
658 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
659 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
660 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
661 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
662 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
663 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
664 0x86868686, 0x68686868, 0x98989898, 0x16161616,
665 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
666 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
667 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
668 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
669 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
670 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
671 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
672 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
673 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
674 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
675 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
676 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
677 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
678 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
679 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
680 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
681 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
682 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
683 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
684 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
685 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
686 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
687 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
688 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
689 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
690 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
691 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
692 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
693 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
694 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
695 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
696 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
697 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
698 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
699 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
700 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
701 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
702 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
703 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
704 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
705 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
706 0x83838383, 0x53535353, 0x99999999, 0x61616161,
707 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
708 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
709 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
710 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
713 __constant u32 rcon[] =
715 0x01000000, 0x02000000, 0x04000000, 0x08000000,
716 0x10000000, 0x20000000, 0x40000000, 0x80000000,
717 0x1b000000, 0x36000000,
720 static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
727 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
729 u32 temp = rek[j + 3];
731 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
732 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
733 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
734 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
736 rek[j + 4] = rek[j + 0]
740 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
741 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
742 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
746 static void AES128_InvertKey (u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256], __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
748 for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
752 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
753 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
754 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
755 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
758 for (u32 i = 1, j = 4; i < 10; i += 1, j += 4)
761 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
762 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
763 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
764 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
767 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
768 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
769 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
770 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
773 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
774 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
775 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
776 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
779 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
780 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
781 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
782 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
786 static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 s_td0[256], __local u32 s_td1[256], __local u32 s_td2[256], __local u32 s_td3[256], __local u32 s_td4[256])
788 u32 s0 = in[0] ^ rdk[0];
789 u32 s1 = in[1] ^ rdk[1];
790 u32 s2 = in[2] ^ rdk[2];
791 u32 s3 = in[3] ^ rdk[3];
798 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
799 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
800 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
801 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
802 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
803 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
804 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
805 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
806 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
807 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
808 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
809 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
810 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
811 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
812 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
813 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
814 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
815 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
816 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
817 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
818 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
819 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
820 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
821 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
822 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
823 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
824 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
825 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
826 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
827 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
828 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
829 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
830 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
831 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
832 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
833 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
835 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
836 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
837 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
838 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
841 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
842 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
843 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
844 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
847 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
848 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
849 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
850 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
853 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
854 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
855 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
856 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
860 static void sha1_transform (const u32 w[16], u32 digest[5])
888 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
889 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
890 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
891 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
892 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
893 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
894 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
895 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
896 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
897 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
898 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
899 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
900 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
901 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
902 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
903 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
904 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
905 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
906 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
907 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
912 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
913 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
914 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
915 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
916 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
917 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
918 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
919 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
920 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
921 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
922 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
923 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
924 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
925 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
926 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
927 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
928 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
929 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
930 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
931 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
936 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
937 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
938 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
939 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
940 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
941 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
942 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
943 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
944 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
945 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
946 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
947 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
948 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
949 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
950 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
951 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
952 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
953 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
954 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
955 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
960 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
961 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
962 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
963 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
964 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
965 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
966 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
967 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
968 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
969 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
970 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
971 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
972 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
973 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
974 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
975 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
976 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
977 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
978 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
979 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
988 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global rar3_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pbkdf2_sha1_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
994 const u32 gid = get_global_id (0);
996 if (gid >= gid_max) return;
998 tmps[gid].dgst[0][0] = SHA1M_A;
999 tmps[gid].dgst[0][1] = SHA1M_B;
1000 tmps[gid].dgst[0][2] = SHA1M_C;
1001 tmps[gid].dgst[0][3] = SHA1M_D;
1002 tmps[gid].dgst[0][4] = SHA1M_E;
1005 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global rar3_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pbkdf2_sha1_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1007 const u32 gid = get_global_id (0);
1009 if (gid >= gid_max) return;
1013 pw_buf[0] = pws[gid].i[0];
1014 pw_buf[1] = pws[gid].i[1];
1015 pw_buf[2] = pws[gid].i[2];
1016 pw_buf[3] = pws[gid].i[3];
1017 pw_buf[4] = pws[gid].i[4];
1019 const u32 pw_len = pws[gid].pw_len;
1023 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1024 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1026 const u32 salt_len = 8;
1028 // this is large enough to hold all possible w[] arrays for 64 iterations
1030 #define LARGEBLOCK_ELEMS ((40 + 8 + 3) * 16)
1032 u32 largeblock[LARGEBLOCK_ELEMS];
1034 for (u32 i = 0; i < LARGEBLOCK_ELEMS; i++) largeblock[i] = 0;
1036 for (u32 i = 0, p = 0; i < 64; i++)
1038 for (u32 j = 0; j < pw_len; j++, p += 2)
1040 PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j));
1043 for (u32 j = 0; j < salt_len; j++, p += 1)
1045 PUTCHAR_BE (largeblock, p, GETCHAR (salt_buf, j));
1048 PUTCHAR_BE (largeblock, p + 2, (loop_pos >> 16) & 0xff);
1053 const u32 p3 = (pw_len * 2) + salt_len + 3;
1055 const u32 init_pos = loop_pos / (ROUNDS / 16);
1059 dgst[0] = tmps[gid].dgst[init_pos][0];
1060 dgst[1] = tmps[gid].dgst[init_pos][1];
1061 dgst[2] = tmps[gid].dgst[init_pos][2];
1062 dgst[3] = tmps[gid].dgst[init_pos][3];
1063 dgst[4] = tmps[gid].dgst[init_pos][4];
1065 u32 iter = loop_pos;
1067 for (u32 i = 0; i < 256; i += 4)
1069 for (u32 j = 0; j < 64; j++)
1071 const u32 p = ((j + 1) * p3) - 2;
1073 PUTCHAR_BE (largeblock, p, iter >> 8);
1076 for (u32 k = 0; k < 4; k++)
1078 for (u32 j = 0; j < 64; j++)
1080 const u32 p = ((j + 1) * p3) - 3;
1082 PUTCHAR_BE (largeblock, p, iter >> 0);
1087 for (u32 j = 0; j < p3; j++)
1089 const u32 j16 = j * 16;
1091 sha1_transform (&largeblock[j16], dgst);
1096 tmps[gid].dgst[init_pos + 1][0] = dgst[0];
1097 tmps[gid].dgst[init_pos + 1][1] = dgst[1];
1098 tmps[gid].dgst[init_pos + 1][2] = dgst[2];
1099 tmps[gid].dgst[init_pos + 1][3] = dgst[3];
1100 tmps[gid].dgst[init_pos + 1][4] = dgst[4];
1103 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global rar3_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pbkdf2_sha1_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1109 const u32 gid = get_global_id (0);
1110 const u32 lid = get_local_id (0);
1116 const u32 lid4 = lid * 4;
1118 __local u32 s_td0[256];
1119 __local u32 s_td1[256];
1120 __local u32 s_td2[256];
1121 __local u32 s_td3[256];
1122 __local u32 s_td4[256];
1124 __local u32 s_te0[256];
1125 __local u32 s_te1[256];
1126 __local u32 s_te2[256];
1127 __local u32 s_te3[256];
1128 __local u32 s_te4[256];
1130 s_td0[lid4 + 0] = td0[lid4 + 0];
1131 s_td0[lid4 + 1] = td0[lid4 + 1];
1132 s_td0[lid4 + 2] = td0[lid4 + 2];
1133 s_td0[lid4 + 3] = td0[lid4 + 3];
1135 s_td1[lid4 + 0] = td1[lid4 + 0];
1136 s_td1[lid4 + 1] = td1[lid4 + 1];
1137 s_td1[lid4 + 2] = td1[lid4 + 2];
1138 s_td1[lid4 + 3] = td1[lid4 + 3];
1140 s_td2[lid4 + 0] = td2[lid4 + 0];
1141 s_td2[lid4 + 1] = td2[lid4 + 1];
1142 s_td2[lid4 + 2] = td2[lid4 + 2];
1143 s_td2[lid4 + 3] = td2[lid4 + 3];
1145 s_td3[lid4 + 0] = td3[lid4 + 0];
1146 s_td3[lid4 + 1] = td3[lid4 + 1];
1147 s_td3[lid4 + 2] = td3[lid4 + 2];
1148 s_td3[lid4 + 3] = td3[lid4 + 3];
1150 s_td4[lid4 + 0] = td4[lid4 + 0];
1151 s_td4[lid4 + 1] = td4[lid4 + 1];
1152 s_td4[lid4 + 2] = td4[lid4 + 2];
1153 s_td4[lid4 + 3] = td4[lid4 + 3];
1155 s_te0[lid4 + 0] = te0[lid4 + 0];
1156 s_te0[lid4 + 1] = te0[lid4 + 1];
1157 s_te0[lid4 + 2] = te0[lid4 + 2];
1158 s_te0[lid4 + 3] = te0[lid4 + 3];
1160 s_te1[lid4 + 0] = te1[lid4 + 0];
1161 s_te1[lid4 + 1] = te1[lid4 + 1];
1162 s_te1[lid4 + 2] = te1[lid4 + 2];
1163 s_te1[lid4 + 3] = te1[lid4 + 3];
1165 s_te2[lid4 + 0] = te2[lid4 + 0];
1166 s_te2[lid4 + 1] = te2[lid4 + 1];
1167 s_te2[lid4 + 2] = te2[lid4 + 2];
1168 s_te2[lid4 + 3] = te2[lid4 + 3];
1170 s_te3[lid4 + 0] = te3[lid4 + 0];
1171 s_te3[lid4 + 1] = te3[lid4 + 1];
1172 s_te3[lid4 + 2] = te3[lid4 + 2];
1173 s_te3[lid4 + 3] = te3[lid4 + 3];
1175 s_te4[lid4 + 0] = te4[lid4 + 0];
1176 s_te4[lid4 + 1] = te4[lid4 + 1];
1177 s_te4[lid4 + 2] = te4[lid4 + 2];
1178 s_te4[lid4 + 3] = te4[lid4 + 3];
1180 barrier (CLK_LOCAL_MEM_FENCE);
1182 if (gid >= gid_max) return;
1184 const u32 pw_len = pws[gid].pw_len;
1186 const u32 salt_len = 8;
1188 const u32 p3 = (pw_len * 2) + salt_len + 3;
1192 w_buf[ 0] = 0x80000000;
1207 w_buf[15] = (p3 * ROUNDS) * 8;
1211 dgst[0] = tmps[gid].dgst[16][0];
1212 dgst[1] = tmps[gid].dgst[16][1];
1213 dgst[2] = tmps[gid].dgst[16][2];
1214 dgst[3] = tmps[gid].dgst[16][3];
1215 dgst[4] = tmps[gid].dgst[16][4];
1217 sha1_transform (w_buf, dgst);
1223 data[0] = salt_bufs[salt_pos].salt_buf[2];
1224 data[1] = salt_bufs[salt_pos].salt_buf[3];
1225 data[2] = salt_bufs[salt_pos].salt_buf[4];
1226 data[3] = salt_bufs[salt_pos].salt_buf[5];
1230 ukeyx[0] = swap_workaround (dgst[0]);
1231 ukeyx[1] = swap_workaround (dgst[1]);
1232 ukeyx[2] = swap_workaround (dgst[2]);
1233 ukeyx[3] = swap_workaround (dgst[3]);
1235 AES128_ExpandKey (ukeyx, rk, s_te0, s_te1, s_te2, s_te3, s_te4);
1237 AES128_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1241 AES128_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4);
1250 for (int i = 0; i < 16; i++)
1254 pw_buf[0] = pws[gid].i[0];
1255 pw_buf[1] = pws[gid].i[1];
1256 pw_buf[2] = pws[gid].i[2];
1257 pw_buf[3] = pws[gid].i[3];
1258 pw_buf[4] = pws[gid].i[4];
1260 const u32 pw_len = pws[gid].pw_len;
1264 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1265 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1267 const u32 salt_len = 8;
1269 const u32 p3 = (pw_len * 2) + salt_len + 3;
1292 for (u32 j = 0; j < pw_len; j++, p += 2)
1294 PUTCHAR_BE (w, p, GETCHAR (pw_buf, j));
1297 for (u32 j = 0; j < salt_len; j++, p += 1)
1299 PUTCHAR_BE (w, p, GETCHAR (salt_buf, j));
1302 const u32 iter_pos = i * (ROUNDS / 16);
1304 PUTCHAR_BE (w, p + 0, (iter_pos >> 0) & 0xff);
1305 PUTCHAR_BE (w, p + 1, (iter_pos >> 8) & 0xff);
1306 PUTCHAR_BE (w, p + 2, (iter_pos >> 16) & 0xff);
1308 PUTCHAR_BE (w, p3, 0x80);
1310 w[15] = ((iter_pos + 1) * p3) * 8;
1314 dgst[0] = tmps[gid].dgst[i][0];
1315 dgst[1] = tmps[gid].dgst[i][1];
1316 dgst[2] = tmps[gid].dgst[i][2];
1317 dgst[3] = tmps[gid].dgst[i][3];
1318 dgst[4] = tmps[gid].dgst[i][4];
1320 sha1_transform (w, dgst);
1322 PUTCHAR (iv, i, dgst[4] & 0xff);
1325 out[0] ^= swap_workaround (iv[0]);
1326 out[1] ^= swap_workaround (iv[1]);
1327 out[2] ^= swap_workaround (iv[2]);
1328 out[3] ^= swap_workaround (iv[3]);
1330 const u32 r0 = out[0];
1331 const u32 r1 = out[1];