2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 // http://www.schneier.com/code/constants.txt
38 __constant u32 c_sbox0[256] =
40 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
41 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
42 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
43 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
44 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
45 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
46 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
47 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
48 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
49 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
50 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
51 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
52 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
53 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
54 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
55 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
56 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
57 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
58 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
59 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
60 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
61 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
62 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
63 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
64 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
65 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
66 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
67 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
68 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
69 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
70 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
71 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
72 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
73 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
74 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
75 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
76 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
77 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
78 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
79 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
80 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
81 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
82 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
83 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
84 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
85 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
86 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
87 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
88 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
89 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
90 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
91 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
92 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
93 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
94 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
95 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
96 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
97 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
98 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
99 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
100 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
101 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
102 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
103 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a
106 __constant u32 c_sbox1[256] =
108 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
109 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
110 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
111 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
112 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
113 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
114 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
115 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
116 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
117 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
118 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
119 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
120 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
121 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
122 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
123 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
124 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
125 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
126 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
127 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
128 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
129 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
130 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
131 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
132 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
133 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
134 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
135 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
136 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
137 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
138 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
139 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
140 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
141 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
142 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
143 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
144 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
145 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
146 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
147 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
148 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
149 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
150 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
151 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
152 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
153 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
154 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
155 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
156 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
157 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
158 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
159 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
160 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
161 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
162 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
163 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
164 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
165 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
166 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
167 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
168 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
169 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
170 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
171 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7
174 __constant u32 c_sbox2[256] =
176 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
177 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
178 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
179 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
180 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
181 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
182 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
183 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
184 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
185 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
186 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
187 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
188 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
189 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
190 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
191 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
192 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
193 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
194 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
195 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
196 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
197 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
198 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
199 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
200 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
201 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
202 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
203 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
204 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
205 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
206 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
207 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
208 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
209 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
210 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
211 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
212 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
213 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
214 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
215 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
216 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
217 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
218 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
219 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
220 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
221 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
222 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
223 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
224 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
225 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
226 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
227 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
228 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
229 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
230 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
231 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
232 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
233 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
234 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
235 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
236 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
237 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
238 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
239 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0
242 __constant u32 c_sbox3[256] =
244 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
245 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
246 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
247 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
248 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
249 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
250 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
251 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
252 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
253 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
254 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
255 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
256 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
257 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
258 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
259 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
260 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
261 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
262 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
263 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
264 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
265 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
266 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
267 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
268 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
269 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
270 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
271 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
272 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
273 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
274 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
275 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
276 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
277 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
278 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
279 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
280 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
281 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
282 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
283 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
284 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
285 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
286 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
287 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
288 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
289 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
290 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
291 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
292 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
293 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
294 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
295 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
296 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
297 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
298 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
299 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
300 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
301 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
302 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
303 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
304 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
305 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
306 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
307 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
310 __constant u32 c_pbox[18] =
312 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
313 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
314 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
315 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
316 0x9216d5d9, 0x8979fb1b
319 #define BF_ROUND(L,R,N) \
321 uchar4 c = as_uchar4 ((L)); \
330 (R) ^= tmp ^ P[(N)]; \
333 #define BF_ENCRYPT(L,R) \
337 BF_ROUND (L, R, 1); \
338 BF_ROUND (R, L, 2); \
339 BF_ROUND (L, R, 3); \
340 BF_ROUND (R, L, 4); \
341 BF_ROUND (L, R, 5); \
342 BF_ROUND (R, L, 6); \
343 BF_ROUND (L, R, 7); \
344 BF_ROUND (R, L, 8); \
345 BF_ROUND (L, R, 9); \
346 BF_ROUND (R, L, 10); \
347 BF_ROUND (L, R, 11); \
348 BF_ROUND (R, L, 12); \
349 BF_ROUND (L, R, 13); \
350 BF_ROUND (R, L, 14); \
351 BF_ROUND (L, R, 15); \
352 BF_ROUND (R, L, 16); \
363 static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5])
391 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
392 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
393 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
394 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
395 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
396 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
397 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
398 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
399 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
400 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
401 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
402 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
403 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
404 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
405 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
406 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
407 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
408 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
409 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
410 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
415 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
416 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
417 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
418 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
419 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
420 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
421 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
422 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
423 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
424 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
425 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
426 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
427 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
428 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
429 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
430 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
431 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
432 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
433 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
434 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
439 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
440 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
441 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
442 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
443 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
444 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
445 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
446 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
447 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
448 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
449 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
450 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
451 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
452 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
453 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
454 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
455 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
456 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
457 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
458 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
463 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
464 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
465 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
466 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
467 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
468 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
469 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
470 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
471 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
472 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
473 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
474 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
475 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
476 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
477 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
478 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
479 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
480 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
481 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
482 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
491 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pwsafe2_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
497 const u32 lid = get_local_id (0);
498 const u32 gid = get_global_id (0);
500 if (gid >= gid_max) return;
504 w0[0] = pws[gid].i[ 0];
505 w0[1] = pws[gid].i[ 1];
506 w0[2] = pws[gid].i[ 2];
507 w0[3] = pws[gid].i[ 3];
511 w1[0] = pws[gid].i[ 4];
512 w1[1] = pws[gid].i[ 5];
513 w1[2] = pws[gid].i[ 6];
514 w1[3] = pws[gid].i[ 7];
518 w2[0] = pws[gid].i[ 8];
519 w2[1] = pws[gid].i[ 9];
520 w2[2] = pws[gid].i[10];
521 w2[3] = pws[gid].i[11];
525 w3[0] = pws[gid].i[12];
526 w3[1] = pws[gid].i[13];
527 w3[2] = pws[gid].i[14];
528 w3[3] = pws[gid].i[15];
530 const u32 pw_len = pws[gid].pw_len;
532 append_0x80_4 (w0, w1, w2, w3, pw_len);
538 const u32 salt_len = salt_bufs[salt_pos].salt_len;
542 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
543 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
549 w3[1] = w2[3] << 16 | w2[2] >> 16;
550 w3[0] = w2[2] << 16 | w2[1] >> 16;
551 w2[3] = w2[1] << 16 | w2[0] >> 16;
552 w2[2] = w2[0] << 16 | w1[3] >> 16;
553 w2[1] = w1[3] << 16 | w1[2] >> 16;
554 w2[0] = w1[2] << 16 | w1[1] >> 16;
555 w1[3] = w1[1] << 16 | w1[0] >> 16;
556 w1[2] = w1[0] << 16 | w0[3] >> 16;
557 w1[1] = w0[3] << 16 | w0[2] >> 16;
558 w1[0] = w0[2] << 16 | w0[1] >> 16;
559 w0[3] = w0[1] << 16 | w0[0] >> 16;
564 w0[0] = swap_workaround (w0[0]);
565 w0[1] = swap_workaround (w0[1]);
566 w0[2] = swap_workaround (w0[2]);
567 w0[3] = swap_workaround (w0[3]);
568 w1[0] = swap_workaround (w1[0]);
569 w1[1] = swap_workaround (w1[1]);
570 w1[2] = swap_workaround (w1[2]);
571 w1[3] = swap_workaround (w1[3]);
572 w2[0] = swap_workaround (w2[0]);
573 w2[1] = swap_workaround (w2[1]);
574 w2[2] = swap_workaround (w2[2]);
575 w2[3] = swap_workaround (w2[3]);
576 w3[0] = swap_workaround (w3[0]);
577 w3[1] = swap_workaround (w3[1]);
579 const u32 block_len = salt_len + 2 + pw_len;
582 w3[3] = block_len * 8;
592 sha1_transform (w0, w1, w2, w3, digest);
600 for (u32 i = 0; i < 18; i++)
605 __local u32x S0_all[8][256];
606 __local u32x S1_all[8][256];
607 __local u32x S2_all[8][256];
608 __local u32x S3_all[8][256];
610 __local u32x *S0 = S0_all[lid];
611 __local u32x *S1 = S1_all[lid];
612 __local u32x *S2 = S2_all[lid];
613 __local u32x *S3 = S3_all[lid];
615 for (u32 i = 0; i < 256; i++)
623 for (u32 i = 0; i < 18; i++)
625 P[i] ^= digest[i % 5];
631 for (u32 i = 0; i < 18; i += 2)
639 for (u32 i = 0; i < 256; i += 4)
652 for (u32 i = 0; i < 256; i += 4)
665 for (u32 i = 0; i < 256; i += 4)
678 for (u32 i = 0; i < 256; i += 4)
693 tmps[gid].digest[0] = salt_buf[0];
694 tmps[gid].digest[1] = salt_buf[1];
696 for (u32 i = 0; i < 18; i++)
698 tmps[gid].P[i] = P[i];
701 for (u32 i = 0; i < 256; i++)
703 tmps[gid].S0[i] = S0[i];
704 tmps[gid].S1[i] = S1[i];
705 tmps[gid].S2[i] = S2[i];
706 tmps[gid].S3[i] = S3[i];
710 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pwsafe2_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
716 const u32 gid = get_global_id (0);
718 if (gid >= gid_max) return;
720 const u32 lid = get_local_id (0);
726 digest[0] = tmps[gid].digest[0];
727 digest[1] = tmps[gid].digest[1];
731 for (u32 i = 0; i < 18; i++)
733 P[i] = tmps[gid].P[i];
736 __local u32x S0_all[8][256];
737 __local u32x S1_all[8][256];
738 __local u32x S2_all[8][256];
739 __local u32x S3_all[8][256];
741 __local u32x *S0 = S0_all[lid];
742 __local u32x *S1 = S1_all[lid];
743 __local u32x *S2 = S2_all[lid];
744 __local u32x *S3 = S3_all[lid];
746 for (u32 i = 0; i < 256; i++)
748 S0[i] = tmps[gid].S0[i];
749 S1[i] = tmps[gid].S1[i];
750 S2[i] = tmps[gid].S2[i];
751 S3[i] = tmps[gid].S3[i];
759 for (u32 i = 0; i < loop_cnt; i++)
766 tmps[gid].digest[0] = L0;
767 tmps[gid].digest[1] = R0;
770 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pwsafe2_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
776 const u32 gid = get_global_id (0);
778 if (gid >= gid_max) return;
780 const u32 lid = get_local_id (0);
786 digest[0] = tmps[gid].digest[0];
787 digest[1] = tmps[gid].digest[1];
796 w0[0] = swap_workaround (digest[0]);
797 w0[1] = swap_workaround (digest[1]);
815 out[0] = 0; // yep, not a bug! context is zero here
821 sha1_transform (w0, w1, w2, w3, out);
823 const u32x r0 = out[0];
824 const u32x r1 = out[1];
825 const u32x r2 = out[2];
826 const u32x r3 = out[3];
830 #include VECT_COMPARE_M