2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
27 #include "common_nv.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
33 #define ROUNDS 0x40000
35 #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
36 #define GETCHAR(a,p) ((u8 *)(a))[(p)]
38 #define PUTCHAR_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c)
39 #define GETCHAR_BE(a,p) ((u8 *)(a))[(p) ^ 3]
41 __device__ __constant__ u32 te0[256] =
43 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
44 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
45 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
46 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
47 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
48 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
49 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
50 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
51 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
52 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
53 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
54 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
55 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
56 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
57 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
58 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
59 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
60 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
61 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
62 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
63 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
64 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
65 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
66 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
67 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
68 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
69 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
70 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
71 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
72 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
73 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
74 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
75 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
76 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
77 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
78 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
79 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
80 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
81 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
82 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
83 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
84 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
85 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
86 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
87 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
88 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
89 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
90 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
91 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
92 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
93 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
94 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
95 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
96 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
97 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
98 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
99 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
100 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
101 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
102 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
103 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
104 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
105 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
106 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
109 __device__ __constant__ u32 te1[256] =
111 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
112 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
113 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
114 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
115 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
116 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
117 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
118 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
119 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
120 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
121 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
122 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
123 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
124 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
125 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
126 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
127 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
128 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
129 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
130 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
131 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
132 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
133 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
134 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
135 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
136 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
137 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
138 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
139 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
140 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
141 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
142 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
143 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
144 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
145 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
146 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
147 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
148 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
149 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
150 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
151 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
152 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
153 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
154 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
155 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
156 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
157 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
158 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
159 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
160 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
161 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
162 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
163 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
164 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
165 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
166 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
167 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
168 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
169 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
170 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
171 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
172 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
173 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
174 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
177 __device__ __constant__ u32 te2[256] =
179 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
180 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
181 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
182 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
183 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
184 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
185 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
186 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
187 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
188 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
189 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
190 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
191 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
192 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
193 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
194 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
195 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
196 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
197 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
198 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
199 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
200 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
201 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
202 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
203 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
204 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
205 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
206 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
207 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
208 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
209 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
210 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
211 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
212 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
213 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
214 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
215 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
216 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
217 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
218 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
219 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
220 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
221 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
222 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
223 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
224 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
225 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
226 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
227 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
228 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
229 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
230 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
231 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
232 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
233 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
234 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
235 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
236 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
237 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
238 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
239 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
240 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
241 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
242 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
245 __device__ __constant__ u32 te3[256] =
247 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
248 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
249 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
250 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
251 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
252 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
253 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
254 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
255 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
256 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
257 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
258 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
259 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
260 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
261 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
262 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
263 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
264 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
265 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
266 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
267 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
268 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
269 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
270 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
271 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
272 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
273 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
274 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
275 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
276 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
277 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
278 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
279 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
280 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
281 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
282 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
283 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
284 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
285 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
286 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
287 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
288 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
289 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
290 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
291 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
292 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
293 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
294 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
295 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
296 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
297 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
298 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
299 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
300 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
301 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
302 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
303 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
304 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
305 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
306 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
307 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
308 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
309 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
310 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
313 __device__ __constant__ u32 te4[256] =
315 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
316 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
317 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b,
318 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676,
319 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d,
320 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0,
321 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf,
322 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0,
323 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626,
324 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc,
325 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1,
326 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515,
327 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3,
328 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a,
329 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2,
330 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575,
331 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a,
332 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0,
333 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3,
334 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484,
335 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed,
336 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b,
337 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939,
338 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf,
339 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb,
340 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585,
341 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f,
342 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8,
343 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f,
344 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5,
345 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121,
346 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2,
347 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec,
348 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717,
349 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d,
350 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373,
351 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc,
352 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888,
353 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414,
354 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb,
355 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a,
356 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c,
357 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262,
358 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979,
359 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d,
360 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9,
361 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea,
362 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808,
363 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e,
364 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6,
365 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f,
366 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a,
367 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666,
368 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e,
369 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9,
370 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e,
371 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111,
372 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494,
373 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9,
374 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf,
375 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d,
376 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868,
377 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f,
378 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
381 __device__ __constant__ u32 td0[256] =
383 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
384 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
385 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
386 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
387 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
388 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
389 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
390 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
391 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
392 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
393 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
394 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
395 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
396 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
397 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
398 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
399 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
400 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
401 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
402 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
403 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
404 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
405 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
406 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
407 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
408 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
409 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
410 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
411 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
412 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
413 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
414 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
415 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
416 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
417 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
418 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
419 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
420 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
421 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
422 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
423 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
424 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
425 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
426 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
427 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
428 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
429 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
430 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
431 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
432 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
433 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
434 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
435 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
436 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
437 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
438 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
439 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
440 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
441 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
442 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
443 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
444 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
445 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
446 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
449 __device__ __constant__ u32 td1[256] =
451 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
452 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
453 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
454 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
455 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0,
456 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
457 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259,
458 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
459 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
460 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
461 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f,
462 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
463 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8,
464 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
465 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
466 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
467 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2,
468 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
469 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb,
470 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
471 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
472 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
473 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e,
474 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
475 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000,
476 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
477 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
478 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
479 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91,
480 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
481 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17,
482 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
483 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
484 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
485 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1,
486 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
487 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1,
488 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
489 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
490 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
491 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b,
492 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
493 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46,
494 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
495 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
496 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
497 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a,
498 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
499 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c,
500 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
501 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
502 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
503 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604,
504 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
505 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41,
506 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
507 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
508 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
509 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737,
510 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
511 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340,
512 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
513 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
514 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
517 __device__ __constant__ u32 td2[256] =
519 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
520 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
521 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
522 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
523 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe,
524 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
525 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552,
526 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
527 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
528 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
529 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253,
530 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
531 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b,
532 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
533 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
534 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
535 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69,
536 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
537 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6,
538 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
539 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
540 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
541 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9,
542 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
543 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000,
544 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
545 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
546 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
547 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b,
548 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
549 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b,
550 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
551 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
552 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
553 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4,
554 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
555 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729,
556 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
557 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
558 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
559 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4,
560 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
561 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e,
562 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
563 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
564 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
565 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f,
566 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
567 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0,
568 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
569 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
570 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
571 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496,
572 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
573 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b,
574 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
575 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
576 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
577 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7,
578 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
579 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3,
580 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
581 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
582 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
585 __device__ __constant__ u32 td3[256] =
587 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
588 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
589 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
590 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
591 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d,
592 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
593 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95,
594 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
595 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
596 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
597 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562,
598 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
599 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752,
600 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
601 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
602 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
603 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e,
604 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
605 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4,
606 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
607 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
608 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
609 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767,
610 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
611 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000,
612 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
613 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
614 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
615 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b,
616 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
617 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12,
618 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
619 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
620 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
621 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8,
622 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
623 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7,
624 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
625 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
626 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
627 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698,
628 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
629 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254,
630 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
631 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
632 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
633 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883,
634 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
635 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629,
636 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
637 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
638 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
639 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4,
640 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
641 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb,
642 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
643 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
644 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
645 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73,
646 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
647 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2,
648 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
649 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
650 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
653 __device__ __constant__ u32 td4[256] =
655 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
656 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
657 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e,
658 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb,
659 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282,
660 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787,
661 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444,
662 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb,
663 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232,
664 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d,
665 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b,
666 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e,
667 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666,
668 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2,
669 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949,
670 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525,
671 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464,
672 0x86868686, 0x68686868, 0x98989898, 0x16161616,
673 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc,
674 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292,
675 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050,
676 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada,
677 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757,
678 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484,
679 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000,
680 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a,
681 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505,
682 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606,
683 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f,
684 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202,
685 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303,
686 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b,
687 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141,
688 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea,
689 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece,
690 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373,
691 0x96969696, 0xacacacac, 0x74747474, 0x22222222,
692 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585,
693 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8,
694 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e,
695 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171,
696 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989,
697 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e,
698 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b,
699 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b,
700 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020,
701 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe,
702 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4,
703 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333,
704 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131,
705 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959,
706 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f,
707 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9,
708 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d,
709 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f,
710 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef,
711 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d,
712 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0,
713 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c,
714 0x83838383, 0x53535353, 0x99999999, 0x61616161,
715 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e,
716 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626,
717 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363,
718 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d,
721 __device__ __constant__ u32 rcon[] =
723 0x01000000, 0x02000000, 0x04000000, 0x08000000,
724 0x10000000, 0x20000000, 0x40000000, 0x80000000,
725 0x1b000000, 0x36000000,
728 __device__ static void AES128_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256])
736 for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
738 u32 temp = rek[j + 3];
740 temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000)
741 ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000)
742 ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00)
743 ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff);
745 rek[j + 4] = rek[j + 0]
749 rek[j + 5] = rek[j + 1] ^ rek[j + 4];
750 rek[j + 6] = rek[j + 2] ^ rek[j + 5];
751 rek[j + 7] = rek[j + 3] ^ rek[j + 6];
755 __device__ static void AES128_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256])
757 for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
761 temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp;
762 temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp;
763 temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp;
764 temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp;
767 for (u32 i = 1, j = 4; i < 10; i += 1, j += 4)
770 s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^
771 s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^
772 s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^
773 s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff];
776 s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^
777 s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^
778 s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^
779 s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff];
782 s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^
783 s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^
784 s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^
785 s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff];
788 s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^
789 s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^
790 s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^
791 s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff];
795 __device__ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256])
797 u32 s0 = in[0] ^ rdk[0];
798 u32 s1 = in[1] ^ rdk[1];
799 u32 s2 = in[2] ^ rdk[2];
800 u32 s3 = in[3] ^ rdk[3];
807 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4];
808 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5];
809 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6];
810 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7];
811 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8];
812 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9];
813 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10];
814 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11];
815 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12];
816 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13];
817 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14];
818 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15];
819 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16];
820 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17];
821 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18];
822 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19];
823 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20];
824 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21];
825 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22];
826 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23];
827 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24];
828 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25];
829 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26];
830 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27];
831 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28];
832 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29];
833 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30];
834 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31];
835 s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32];
836 s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33];
837 s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34];
838 s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35];
839 t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36];
840 t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37];
841 t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38];
842 t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39];
844 out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
845 ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
846 ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
847 ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
850 out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
851 ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
852 ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
853 ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
856 out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
857 ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
858 ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
859 ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
862 out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
863 ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
864 ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
865 ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
869 __device__ static void sha1_transform (const u32x w[16], u32x digest[5])
897 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
898 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
899 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
900 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
901 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
902 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
903 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
904 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
905 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
906 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
907 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
908 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
909 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
910 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
911 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
912 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
913 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
914 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
915 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
916 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
921 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
922 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
923 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
924 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
925 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
926 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
927 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
928 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
929 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
930 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
931 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
932 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
933 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
934 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
935 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
936 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
937 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
938 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
939 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
940 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
945 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
946 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
947 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
948 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
949 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
950 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
951 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
952 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
953 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
954 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
955 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
956 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
957 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
958 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
959 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
960 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
961 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
962 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
963 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
964 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
969 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
970 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
971 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
972 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
973 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
974 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
975 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
976 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
977 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
978 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
979 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
980 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
981 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
982 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
983 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
984 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
985 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
986 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
987 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
988 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
997 extern "C" __global__ void __launch_bounds__ (256, 1) m12500_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, rar3_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1003 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1005 if (gid >= gid_max) return;
1007 tmps[gid].dgst[0][0] = SHA1M_A;
1008 tmps[gid].dgst[0][1] = SHA1M_B;
1009 tmps[gid].dgst[0][2] = SHA1M_C;
1010 tmps[gid].dgst[0][3] = SHA1M_D;
1011 tmps[gid].dgst[0][4] = SHA1M_E;
1014 extern "C" __global__ void __launch_bounds__ (256, 1) m12500_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, rar3_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1016 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1018 if (gid >= gid_max) return;
1022 pw_buf[0] = pws[gid].i[0];
1023 pw_buf[1] = pws[gid].i[1];
1024 pw_buf[2] = pws[gid].i[2];
1025 pw_buf[3] = pws[gid].i[3];
1026 pw_buf[4] = pws[gid].i[4];
1028 const u32 pw_len = pws[gid].pw_len;
1032 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1033 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1035 const u32 salt_len = 8;
1037 // this is large enough to hold all possible w[] arrays for 64 iterations
1039 #define LARGEBLOCK_ELEMS ((40 + 8 + 3) * 16)
1041 u32 largeblock[LARGEBLOCK_ELEMS];
1043 for (u32 i = 0; i < LARGEBLOCK_ELEMS; i++) largeblock[i] = 0;
1045 for (u32 i = 0, p = 0; i < 64; i++)
1047 for (u32 j = 0; j < pw_len; j++, p += 2)
1049 PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j));
1052 for (u32 j = 0; j < salt_len; j++, p += 1)
1054 PUTCHAR_BE (largeblock, p, GETCHAR (salt_buf, j));
1057 PUTCHAR_BE (largeblock, p + 2, (loop_pos >> 16) & 0xff);
1062 const u32 p3 = (pw_len * 2) + salt_len + 3;
1064 const u32 init_pos = loop_pos / (ROUNDS / 16);
1068 dgst[0] = tmps[gid].dgst[init_pos][0];
1069 dgst[1] = tmps[gid].dgst[init_pos][1];
1070 dgst[2] = tmps[gid].dgst[init_pos][2];
1071 dgst[3] = tmps[gid].dgst[init_pos][3];
1072 dgst[4] = tmps[gid].dgst[init_pos][4];
1074 u32 iter = loop_pos;
1076 for (u32 i = 0; i < 256; i += 4)
1078 for (u32 j = 0; j < 64; j++)
1080 const u32 p = ((j + 1) * p3) - 2;
1082 PUTCHAR_BE (largeblock, p, iter >> 8);
1085 for (u32 k = 0; k < 4; k++)
1087 for (u32 j = 0; j < 64; j++)
1089 const u32 p = ((j + 1) * p3) - 3;
1091 PUTCHAR_BE (largeblock, p, iter >> 0);
1096 for (u32 j = 0; j < p3; j++)
1098 const u32 j16 = j * 16;
1100 sha1_transform (&largeblock[j16], dgst);
1105 tmps[gid].dgst[init_pos + 1][0] = dgst[0];
1106 tmps[gid].dgst[init_pos + 1][1] = dgst[1];
1107 tmps[gid].dgst[init_pos + 1][2] = dgst[2];
1108 tmps[gid].dgst[init_pos + 1][3] = dgst[3];
1109 tmps[gid].dgst[init_pos + 1][4] = dgst[4];
1112 extern "C" __global__ void __launch_bounds__ (256, 1) m12500_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, rar3_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1118 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1119 const u32 lid = threadIdx.x;
1125 __shared__ u32 s_td0[256];
1126 __shared__ u32 s_td1[256];
1127 __shared__ u32 s_td2[256];
1128 __shared__ u32 s_td3[256];
1129 __shared__ u32 s_td4[256];
1131 __shared__ u32 s_te0[256];
1132 __shared__ u32 s_te1[256];
1133 __shared__ u32 s_te2[256];
1134 __shared__ u32 s_te3[256];
1135 __shared__ u32 s_te4[256];
1137 s_td0[lid] = td0[lid];
1138 s_td1[lid] = td1[lid];
1139 s_td2[lid] = td2[lid];
1140 s_td3[lid] = td3[lid];
1141 s_td4[lid] = td4[lid];
1143 s_te0[lid] = te0[lid];
1144 s_te1[lid] = te1[lid];
1145 s_te2[lid] = te2[lid];
1146 s_te3[lid] = te3[lid];
1147 s_te4[lid] = te4[lid];
1151 if (gid >= gid_max) return;
1153 const u32 pw_len = pws[gid].pw_len;
1155 const u32 salt_len = 8;
1157 const u32 p3 = (pw_len * 2) + salt_len + 3;
1161 w_buf[ 0] = 0x80000000;
1176 w_buf[15] = (p3 * ROUNDS) * 8;
1180 dgst[0] = tmps[gid].dgst[16][0];
1181 dgst[1] = tmps[gid].dgst[16][1];
1182 dgst[2] = tmps[gid].dgst[16][2];
1183 dgst[3] = tmps[gid].dgst[16][3];
1184 dgst[4] = tmps[gid].dgst[16][4];
1186 sha1_transform (w_buf, dgst);
1192 data[0] = salt_bufs[salt_pos].salt_buf[2];
1193 data[1] = salt_bufs[salt_pos].salt_buf[3];
1194 data[2] = salt_bufs[salt_pos].salt_buf[4];
1195 data[3] = salt_bufs[salt_pos].salt_buf[5];
1199 ukeyx[0] = swap_workaround (dgst[0]);
1200 ukeyx[1] = swap_workaround (dgst[1]);
1201 ukeyx[2] = swap_workaround (dgst[2]);
1202 ukeyx[3] = swap_workaround (dgst[3]);
1204 AES128_ExpandKey (ukeyx, rk, s_te0, s_te1, s_te2, s_te3, s_te4);
1206 AES128_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
1210 AES128_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4);
1219 for (int i = 0; i < 16; i++)
1223 pw_buf[0] = pws[gid].i[0];
1224 pw_buf[1] = pws[gid].i[1];
1225 pw_buf[2] = pws[gid].i[2];
1226 pw_buf[3] = pws[gid].i[3];
1227 pw_buf[4] = pws[gid].i[4];
1229 const u32 pw_len = pws[gid].pw_len;
1233 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1234 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1236 const u32 salt_len = 8;
1238 const u32 p3 = (pw_len * 2) + salt_len + 3;
1261 for (u32 j = 0; j < pw_len; j++, p += 2)
1263 PUTCHAR_BE (w, p, GETCHAR (pw_buf, j));
1266 for (u32 j = 0; j < salt_len; j++, p += 1)
1268 PUTCHAR_BE (w, p, GETCHAR (salt_buf, j));
1271 const u32 iter_pos = i * (ROUNDS / 16);
1273 PUTCHAR_BE (w, p + 0, (iter_pos >> 0) & 0xff);
1274 PUTCHAR_BE (w, p + 1, (iter_pos >> 8) & 0xff);
1275 PUTCHAR_BE (w, p + 2, (iter_pos >> 16) & 0xff);
1277 PUTCHAR_BE (w, p3, 0x80);
1279 w[15] = ((iter_pos + 1) * p3) * 8;
1283 dgst[0] = tmps[gid].dgst[i][0];
1284 dgst[1] = tmps[gid].dgst[i][1];
1285 dgst[2] = tmps[gid].dgst[i][2];
1286 dgst[3] = tmps[gid].dgst[i][3];
1287 dgst[4] = tmps[gid].dgst[i][4];
1289 sha1_transform (w, dgst);
1291 PUTCHAR (iv, i, dgst[4] & 0xff);
1294 out[0] ^= swap_workaround (iv[0]);
1295 out[1] ^= swap_workaround (iv[1]);
1296 out[2] ^= swap_workaround (iv[2]);
1297 out[3] ^= swap_workaround (iv[3]);
1299 const u32x r0 = out[0];
1300 const u32x r1 = out[1];
1306 #include VECT_COMPARE_M