2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "inc_vendor.cl"
9 #include "inc_hash_constants.h"
10 #include "inc_hash_functions.cl"
11 #include "inc_types.cl"
12 #include "inc_common.cl"
14 #define COMPARE_S "inc_comp_single.cl"
15 #define COMPARE_M "inc_comp_multi.cl"
17 // http://www.schneier.com/code/constants.txt
19 __constant u32 c_sbox0[256] =
21 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
22 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
23 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
24 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
25 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
26 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
27 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
28 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
29 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
30 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
31 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
32 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
33 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
34 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
35 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
36 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
37 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
38 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
39 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
40 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
41 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
42 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
43 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
44 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
45 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
46 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
47 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
48 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
49 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
50 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
51 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
52 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
53 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
54 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
55 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
56 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
57 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
58 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
59 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
60 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
61 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
62 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
63 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
64 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
65 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
66 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
67 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
68 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
69 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
70 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
71 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
72 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
73 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
74 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
75 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
76 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
77 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
78 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
79 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
80 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
81 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
82 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
83 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
84 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a
87 __constant u32 c_sbox1[256] =
89 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
90 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
91 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
92 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
93 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
94 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
95 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
96 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
97 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
98 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
99 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
100 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
101 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
102 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
103 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
104 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
105 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
106 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
107 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
108 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
109 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
110 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
111 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
112 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
113 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
114 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
115 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
116 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
117 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
118 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
119 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
120 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
121 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
122 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
123 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
124 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
125 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
126 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
127 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
128 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
129 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
130 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
131 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
132 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
133 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
134 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
135 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
136 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
137 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
138 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
139 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
140 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
141 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
142 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
143 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
144 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
145 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
146 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
147 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
148 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
149 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
150 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
151 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
152 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7
155 __constant u32 c_sbox2[256] =
157 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
158 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
159 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
160 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
161 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
162 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
163 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
164 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
165 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
166 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
167 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
168 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
169 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
170 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
171 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
172 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
173 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
174 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
175 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
176 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
177 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
178 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
179 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
180 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
181 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
182 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
183 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
184 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
185 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
186 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
187 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
188 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
189 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
190 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
191 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
192 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
193 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
194 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
195 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
196 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
197 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
198 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
199 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
200 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
201 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
202 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
203 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
204 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
205 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
206 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
207 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
208 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
209 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
210 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
211 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
212 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
213 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
214 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
215 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
216 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
217 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
218 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
219 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
220 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0
223 __constant u32 c_sbox3[256] =
225 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
226 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
227 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
228 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
229 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
230 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
231 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
232 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
233 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
234 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
235 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
236 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
237 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
238 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
239 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
240 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
241 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
242 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
243 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
244 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
245 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
246 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
247 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
248 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
249 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
250 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
251 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
252 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
253 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
254 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
255 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
256 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
257 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
258 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
259 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
260 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
261 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
262 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
263 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
264 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
265 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
266 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
267 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
268 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
269 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
270 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
271 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
272 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
273 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
274 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
275 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
276 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
277 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
278 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
279 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
280 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
281 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
282 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
283 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
284 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
285 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
286 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
287 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
288 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
292 #define BF_ROUND(L,R,N) \
294 uchar4 c = as_uchar4 ((L)); \
303 (R) ^= tmp ^ P[(N)]; \
308 #define BF_ROUND(L,R,N) \
312 tmp = S0[__bfe ((L), 24, 8)]; \
313 tmp += S1[__bfe ((L), 16, 8)]; \
314 tmp ^= S2[__bfe ((L), 8, 8)]; \
315 tmp += S3[__bfe ((L), 0, 8)]; \
317 (R) ^= tmp ^ P[(N)]; \
322 #define BF_ROUND(L,R,N) \
324 uchar4 c = as_uchar4 ((L)); \
333 (R) ^= tmp ^ P[(N)]; \
337 #define BF_ENCRYPT(L,R) \
341 BF_ROUND (L, R, 1); \
342 BF_ROUND (R, L, 2); \
343 BF_ROUND (L, R, 3); \
344 BF_ROUND (R, L, 4); \
345 BF_ROUND (L, R, 5); \
346 BF_ROUND (R, L, 6); \
347 BF_ROUND (L, R, 7); \
348 BF_ROUND (R, L, 8); \
349 BF_ROUND (L, R, 9); \
350 BF_ROUND (R, L, 10); \
351 BF_ROUND (L, R, 11); \
352 BF_ROUND (R, L, 12); \
353 BF_ROUND (L, R, 13); \
354 BF_ROUND (R, L, 14); \
355 BF_ROUND (L, R, 15); \
356 BF_ROUND (R, L, 16); \
367 void expand_key (u32 E[18], const u32 W[16], const u32 len)
369 u8 *E_cur = (u8 *) E;
370 u8 *E_stop = E_cur + 72;
372 while (E_cur < E_stop)
374 u8 *W_cur = (u8 *) W;
375 u8 *W_stop = W_cur + len;
377 while (W_cur < W_stop)
386 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global bcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
392 const u32 gid = get_global_id (0);
393 const u32 lid = get_local_id (0);
395 if (gid >= gid_max) return;
397 const u32 pw_len = pws[gid].pw_len;
401 w[ 0] = pws[gid].i[ 0];
402 w[ 1] = pws[gid].i[ 1];
403 w[ 2] = pws[gid].i[ 2];
404 w[ 3] = pws[gid].i[ 3];
405 w[ 4] = pws[gid].i[ 4];
406 w[ 5] = pws[gid].i[ 5];
407 w[ 6] = pws[gid].i[ 6];
408 w[ 7] = pws[gid].i[ 7];
409 w[ 8] = pws[gid].i[ 8];
410 w[ 9] = pws[gid].i[ 9];
411 w[10] = pws[gid].i[10];
412 w[11] = pws[gid].i[11];
413 w[12] = pws[gid].i[12];
414 w[13] = pws[gid].i[13];
415 w[14] = pws[gid].i[14];
416 w[15] = pws[gid].i[15];
420 expand_key (E, w, pw_len);
422 E[ 0] = swap32 (E[ 0]);
423 E[ 1] = swap32 (E[ 1]);
424 E[ 2] = swap32 (E[ 2]);
425 E[ 3] = swap32 (E[ 3]);
426 E[ 4] = swap32 (E[ 4]);
427 E[ 5] = swap32 (E[ 5]);
428 E[ 6] = swap32 (E[ 6]);
429 E[ 7] = swap32 (E[ 7]);
430 E[ 8] = swap32 (E[ 8]);
431 E[ 9] = swap32 (E[ 9]);
432 E[10] = swap32 (E[10]);
433 E[11] = swap32 (E[11]);
434 E[12] = swap32 (E[12]);
435 E[13] = swap32 (E[13]);
436 E[14] = swap32 (E[14]);
437 E[15] = swap32 (E[15]);
438 E[16] = swap32 (E[16]);
439 E[17] = swap32 (E[17]);
441 for (u32 i = 0; i < 18; i++)
443 tmps[gid].E[i] = E[i];
452 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
453 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
454 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
455 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
461 __local u32 S0_all[8][256];
462 __local u32 S1_all[8][256];
463 __local u32 S2_all[8][256];
464 __local u32 S3_all[8][256];
466 __local u32 *S0 = S0_all[lid];
467 __local u32 *S1 = S1_all[lid];
468 __local u32 *S2 = S2_all[lid];
469 __local u32 *S3 = S3_all[lid];
475 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
476 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
477 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
478 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
479 0x9216d5d9, 0x8979fb1b
482 for (u32 i = 0; i < 256; i++)
492 for (u32 i = 0; i < 18; i++)
500 for (u32 i = 0; i < 18; i += 2)
502 L0 ^= salt_buf[(i & 2) + 0];
503 R0 ^= salt_buf[(i & 2) + 1];
511 for (u32 i = 0; i < 256; i += 4)
530 for (u32 i = 0; i < 256; i += 4)
549 for (u32 i = 0; i < 256; i += 4)
568 for (u32 i = 0; i < 256; i += 4)
589 for (u32 i = 0; i < 18; i++)
591 tmps[gid].P[i] = P[i];
594 for (u32 i = 0; i < 256; i++)
596 tmps[gid].S0[i] = S0[i];
597 tmps[gid].S1[i] = S1[i];
598 tmps[gid].S2[i] = S2[i];
599 tmps[gid].S3[i] = S3[i];
603 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global bcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
609 const u32 gid = get_global_id (0);
610 const u32 lid = get_local_id (0);
612 if (gid >= gid_max) return;
618 for (u32 i = 0; i < 18; i++)
620 E[i] = tmps[gid].E[i];
625 for (u32 i = 0; i < 18; i++)
627 P[i] = tmps[gid].P[i];
630 __local u32 S0_all[8][256];
631 __local u32 S1_all[8][256];
632 __local u32 S2_all[8][256];
633 __local u32 S3_all[8][256];
635 __local u32 *S0 = S0_all[lid];
636 __local u32 *S1 = S1_all[lid];
637 __local u32 *S2 = S2_all[lid];
638 __local u32 *S3 = S3_all[lid];
640 for (u32 i = 0; i < 256; i++)
642 S0[i] = tmps[gid].S0[i];
643 S1[i] = tmps[gid].S1[i];
644 S2[i] = tmps[gid].S2[i];
645 S3[i] = tmps[gid].S3[i];
654 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
655 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
656 salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
657 salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
666 for (u32 i = 0; i < loop_cnt; i++)
668 for (u32 i = 0; i < 18; i++)
679 for (u32 i = 0; i < 9; i++)
687 for (u32 i = 0; i < 256; i += 2)
695 for (u32 i = 0; i < 256; i += 2)
703 for (u32 i = 0; i < 256; i += 2)
711 for (u32 i = 0; i < 256; i += 2)
719 P[ 0] ^= salt_buf[0];
720 P[ 1] ^= salt_buf[1];
721 P[ 2] ^= salt_buf[2];
722 P[ 3] ^= salt_buf[3];
723 P[ 4] ^= salt_buf[0];
724 P[ 5] ^= salt_buf[1];
725 P[ 6] ^= salt_buf[2];
726 P[ 7] ^= salt_buf[3];
727 P[ 8] ^= salt_buf[0];
728 P[ 9] ^= salt_buf[1];
729 P[10] ^= salt_buf[2];
730 P[11] ^= salt_buf[3];
731 P[12] ^= salt_buf[0];
732 P[13] ^= salt_buf[1];
733 P[14] ^= salt_buf[2];
734 P[15] ^= salt_buf[3];
735 P[16] ^= salt_buf[0];
736 P[17] ^= salt_buf[1];
744 for (u32 i = 0; i < 9; i++)
752 for (u32 i = 0; i < 256; i += 2)
760 for (u32 i = 0; i < 256; i += 2)
768 for (u32 i = 0; i < 256; i += 2)
776 for (u32 i = 0; i < 256; i += 2)
787 for (u32 i = 0; i < 18; i++)
789 tmps[gid].P[i] = P[i];
792 for (u32 i = 0; i < 256; i++)
794 tmps[gid].S0[i] = S0[i];
795 tmps[gid].S1[i] = S1[i];
796 tmps[gid].S2[i] = S2[i];
797 tmps[gid].S3[i] = S3[i];
801 __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global bcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
807 const u32 gid = get_global_id (0);
808 const u32 lid = get_local_id (0);
810 if (gid >= gid_max) return;
816 for (u32 i = 0; i < 18; i++)
818 P[i] = tmps[gid].P[i];
821 __local u32 S0_all[8][256];
822 __local u32 S1_all[8][256];
823 __local u32 S2_all[8][256];
824 __local u32 S3_all[8][256];
826 __local u32 *S0 = S0_all[lid];
827 __local u32 *S1 = S1_all[lid];
828 __local u32 *S2 = S2_all[lid];
829 __local u32 *S3 = S3_all[lid];
831 for (u32 i = 0; i < 256; i++)
833 S0[i] = tmps[gid].S0[i];
834 S1[i] = tmps[gid].S1[i];
835 S2[i] = tmps[gid].S2[i];
836 S3[i] = tmps[gid].S3[i];
849 for (u32 i = 0; i < 64; i++)
860 for (u32 i = 0; i < 64; i++)
872 f &= ~0xff; // its just 23 not 24 !