2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
21 #define COMPARE_M "check_multi_vect1_comp4.c"
24 // http://www.schneier.com/code/constants.txt
26 __constant u32 c_sbox0[256] =
28 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
29 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
30 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
31 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
32 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
33 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
34 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
35 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
36 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
37 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
38 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
39 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
40 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
41 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
42 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
43 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
44 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
45 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
46 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
47 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
48 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
49 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
50 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
51 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
52 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
53 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
54 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
55 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
56 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
57 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
58 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
59 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
60 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
61 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
62 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
63 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
64 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
65 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
66 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
67 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
68 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
69 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
70 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
71 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
72 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
73 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
74 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
75 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
76 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
77 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
78 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
79 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
80 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
81 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
82 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
83 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
84 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
85 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
86 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
87 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
88 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
89 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
90 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
91 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a
94 __constant u32 c_sbox1[256] =
96 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
97 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
98 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
99 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
100 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
101 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
102 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
103 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
104 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
105 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
106 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
107 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
108 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
109 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
110 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
111 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
112 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
113 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
114 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
115 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
116 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
117 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
118 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
119 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
120 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
121 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
122 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
123 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
124 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
125 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
126 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
127 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
128 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
129 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
130 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
131 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
132 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
133 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
134 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
135 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
136 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
137 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
138 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
139 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
140 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
141 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
142 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
143 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
144 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
145 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
146 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
147 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
148 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
149 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
150 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
151 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
152 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
153 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
154 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
155 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
156 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
157 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
158 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
159 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7
162 __constant u32 c_sbox2[256] =
164 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
165 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
166 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
167 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
168 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
169 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
170 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
171 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
172 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
173 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
174 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
175 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
176 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
177 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
178 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
179 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
180 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
181 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
182 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
183 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
184 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
185 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
186 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
187 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
188 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
189 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
190 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
191 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
192 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
193 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
194 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
195 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
196 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
197 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
198 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
199 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
200 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
201 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
202 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
203 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
204 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
205 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
206 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
207 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
208 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
209 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
210 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
211 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
212 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
213 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
214 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
215 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
216 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
217 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
218 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
219 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
220 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
221 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
222 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
223 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
224 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
225 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
226 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
227 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0
230 __constant u32 c_sbox3[256] =
232 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
233 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
234 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
235 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
236 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
237 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
238 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
239 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
240 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
241 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
242 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
243 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
244 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
245 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
246 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
247 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
248 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
249 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
250 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
251 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
252 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
253 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
254 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
255 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
256 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
257 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
258 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
259 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
260 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
261 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
262 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
263 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
264 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
265 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
266 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
267 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
268 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
269 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
270 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
271 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
272 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
273 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
274 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
275 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
276 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
277 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
278 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
279 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
280 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
281 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
282 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
283 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
284 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
285 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
286 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
287 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
288 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
289 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
290 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
291 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
292 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
293 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
294 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
295 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
298 __constant u32 c_pbox[18] =
300 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
301 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
302 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
303 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
304 0x9216d5d9, 0x8979fb1b
307 #define BF_ROUND(L,R,N) \
309 uchar4 c = as_uchar4 ((L)); \
318 (R) ^= tmp ^ P[(N)]; \
321 #define BF_ENCRYPT(L,R) \
325 BF_ROUND (L, R, 1); \
326 BF_ROUND (R, L, 2); \
327 BF_ROUND (L, R, 3); \
328 BF_ROUND (R, L, 4); \
329 BF_ROUND (L, R, 5); \
330 BF_ROUND (R, L, 6); \
331 BF_ROUND (L, R, 7); \
332 BF_ROUND (R, L, 8); \
333 BF_ROUND (L, R, 9); \
334 BF_ROUND (R, L, 10); \
335 BF_ROUND (L, R, 11); \
336 BF_ROUND (R, L, 12); \
337 BF_ROUND (L, R, 13); \
338 BF_ROUND (R, L, 14); \
339 BF_ROUND (L, R, 15); \
340 BF_ROUND (R, L, 16); \
351 static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
379 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t);
380 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t);
381 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t);
382 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t);
383 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t);
384 SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t);
385 SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t);
386 SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t);
387 SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t);
388 SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t);
389 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t);
390 SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t);
391 SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t);
392 SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t);
393 SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t);
394 SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t);
395 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t);
396 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t);
397 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t);
398 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t);
403 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t);
404 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t);
405 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t);
406 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t);
407 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t);
408 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t);
409 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t);
410 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t);
411 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t);
412 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t);
413 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t);
414 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t);
415 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t);
416 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t);
417 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t);
418 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t);
419 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t);
420 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t);
421 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t);
422 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t);
427 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t);
428 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t);
429 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t);
430 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t);
431 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t);
432 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t);
433 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t);
434 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t);
435 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t);
436 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t);
437 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t);
438 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t);
439 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t);
440 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t);
441 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t);
442 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t);
443 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t);
444 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t);
445 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t);
446 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t);
451 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t);
452 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t);
453 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t);
454 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t);
455 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t);
456 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t);
457 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t);
458 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t);
459 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t);
460 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t);
461 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t);
462 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t);
463 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t);
464 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t);
465 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t);
466 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t);
467 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t);
468 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t);
469 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t);
470 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t);
479 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pwsafe2_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
485 const u32 lid = get_local_id (0);
486 const u32 gid = get_global_id (0);
488 if (gid >= gid_max) return;
492 w0[0] = pws[gid].i[ 0];
493 w0[1] = pws[gid].i[ 1];
494 w0[2] = pws[gid].i[ 2];
495 w0[3] = pws[gid].i[ 3];
499 w1[0] = pws[gid].i[ 4];
500 w1[1] = pws[gid].i[ 5];
501 w1[2] = pws[gid].i[ 6];
502 w1[3] = pws[gid].i[ 7];
506 w2[0] = pws[gid].i[ 8];
507 w2[1] = pws[gid].i[ 9];
508 w2[2] = pws[gid].i[10];
509 w2[3] = pws[gid].i[11];
513 w3[0] = pws[gid].i[12];
514 w3[1] = pws[gid].i[13];
515 w3[2] = pws[gid].i[14];
516 w3[3] = pws[gid].i[15];
518 const u32 pw_len = pws[gid].pw_len;
520 append_0x80_4 (w0, w1, w2, w3, pw_len);
526 const u32 salt_len = salt_bufs[salt_pos].salt_len;
530 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
531 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
537 w3[1] = w2[3] << 16 | w2[2] >> 16;
538 w3[0] = w2[2] << 16 | w2[1] >> 16;
539 w2[3] = w2[1] << 16 | w2[0] >> 16;
540 w2[2] = w2[0] << 16 | w1[3] >> 16;
541 w2[1] = w1[3] << 16 | w1[2] >> 16;
542 w2[0] = w1[2] << 16 | w1[1] >> 16;
543 w1[3] = w1[1] << 16 | w1[0] >> 16;
544 w1[2] = w1[0] << 16 | w0[3] >> 16;
545 w1[1] = w0[3] << 16 | w0[2] >> 16;
546 w1[0] = w0[2] << 16 | w0[1] >> 16;
547 w0[3] = w0[1] << 16 | w0[0] >> 16;
552 w0[0] = swap_workaround (w0[0]);
553 w0[1] = swap_workaround (w0[1]);
554 w0[2] = swap_workaround (w0[2]);
555 w0[3] = swap_workaround (w0[3]);
556 w1[0] = swap_workaround (w1[0]);
557 w1[1] = swap_workaround (w1[1]);
558 w1[2] = swap_workaround (w1[2]);
559 w1[3] = swap_workaround (w1[3]);
560 w2[0] = swap_workaround (w2[0]);
561 w2[1] = swap_workaround (w2[1]);
562 w2[2] = swap_workaround (w2[2]);
563 w2[3] = swap_workaround (w2[3]);
564 w3[0] = swap_workaround (w3[0]);
565 w3[1] = swap_workaround (w3[1]);
567 const u32 block_len = salt_len + 2 + pw_len;
570 w3[3] = block_len * 8;
580 sha1_transform (w0, w1, w2, w3, digest);
588 for (u32 i = 0; i < 18; i++)
593 __local u32 S0_all[8][256];
594 __local u32 S1_all[8][256];
595 __local u32 S2_all[8][256];
596 __local u32 S3_all[8][256];
598 __local u32 *S0 = S0_all[lid];
599 __local u32 *S1 = S1_all[lid];
600 __local u32 *S2 = S2_all[lid];
601 __local u32 *S3 = S3_all[lid];
603 for (u32 i = 0; i < 256; i++)
611 for (u32 i = 0; i < 18; i++)
613 P[i] ^= digest[i % 5];
619 for (u32 i = 0; i < 18; i += 2)
627 for (u32 i = 0; i < 256; i += 4)
640 for (u32 i = 0; i < 256; i += 4)
653 for (u32 i = 0; i < 256; i += 4)
666 for (u32 i = 0; i < 256; i += 4)
681 tmps[gid].digest[0] = salt_buf[0];
682 tmps[gid].digest[1] = salt_buf[1];
684 for (u32 i = 0; i < 18; i++)
686 tmps[gid].P[i] = P[i];
689 for (u32 i = 0; i < 256; i++)
691 tmps[gid].S0[i] = S0[i];
692 tmps[gid].S1[i] = S1[i];
693 tmps[gid].S2[i] = S2[i];
694 tmps[gid].S3[i] = S3[i];
698 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pwsafe2_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
704 const u32 gid = get_global_id (0);
706 if (gid >= gid_max) return;
708 const u32 lid = get_local_id (0);
714 digest[0] = tmps[gid].digest[0];
715 digest[1] = tmps[gid].digest[1];
719 for (u32 i = 0; i < 18; i++)
721 P[i] = tmps[gid].P[i];
724 __local u32 S0_all[8][256];
725 __local u32 S1_all[8][256];
726 __local u32 S2_all[8][256];
727 __local u32 S3_all[8][256];
729 __local u32 *S0 = S0_all[lid];
730 __local u32 *S1 = S1_all[lid];
731 __local u32 *S2 = S2_all[lid];
732 __local u32 *S3 = S3_all[lid];
734 for (u32 i = 0; i < 256; i++)
736 S0[i] = tmps[gid].S0[i];
737 S1[i] = tmps[gid].S1[i];
738 S2[i] = tmps[gid].S2[i];
739 S3[i] = tmps[gid].S3[i];
747 for (u32 i = 0; i < loop_cnt; i++)
754 tmps[gid].digest[0] = L0;
755 tmps[gid].digest[1] = R0;
758 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global pwsafe2_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
764 const u32 gid = get_global_id (0);
766 if (gid >= gid_max) return;
768 const u32 lid = get_local_id (0);
774 digest[0] = tmps[gid].digest[0];
775 digest[1] = tmps[gid].digest[1];
784 w0[0] = swap_workaround (digest[0]);
785 w0[1] = swap_workaround (digest[1]);
803 out[0] = 0; // yep, not a bug! context is zero here
809 sha1_transform (w0, w1, w2, w3, out);
811 const u32 r0 = out[0];
812 const u32 r1 = out[1];
813 const u32 r2 = out[2];
814 const u32 r3 = out[3];