2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
18 #define CONCAT(a, b) a##b
19 #define VTYPE(type, width) CONCAT(type, width)
27 typedef VTYPE(uchar, VECT_SIZE) u8x;
28 typedef VTYPE(ushort, VECT_SIZE) u16x;
29 typedef VTYPE(uint, VECT_SIZE) u32x;
30 typedef VTYPE(ulong, VECT_SIZE) u64x;
33 inline u32 l32_from_64_S (u64 a)
35 const u32 r = (u32) (a);
40 inline u32 h32_from_64_S (u64 a)
44 const u32 r = (u32) (a);
49 inline u64 hl32_to_64_S (const u32 a, const u32 b)
51 return as_ulong ((uint2) (b, a));
54 inline u32x l32_from_64 (u64x a)
93 inline u32x h32_from_64 (u64x a)
134 inline u64x hl32_to_64 (const u32x a, const u32x b)
139 r = as_ulong ((uint2) (b, a));
143 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
144 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
148 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
149 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
153 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
154 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
155 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
156 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
160 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
161 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
162 r.sa = as_ulong ((uint2) (b.sa, a.sa));
163 r.sb = as_ulong ((uint2) (b.sb, a.sb));
164 r.sc = as_ulong ((uint2) (b.sc, a.sc));
165 r.sd = as_ulong ((uint2) (b.sd, a.sd));
166 r.se = as_ulong ((uint2) (b.se, a.se));
167 r.sf = as_ulong ((uint2) (b.sf, a.sf));
174 inline u32 swap32_S (const u32 v)
176 return (as_uint (as_uchar4 (v).s3210));
179 inline u64 swap64_S (const u64 v)
181 return (as_ulong (as_uchar8 (v).s76543210));
184 inline u32 rotr32_S (const u32 a, const u32 n)
186 return rotate (a, 32 - n);
189 inline u32 rotl32_S (const u32 a, const u32 n)
191 return rotate (a, n);
194 inline u64 rotr64_S (const u64 a, const u32 n)
196 const u32 a0 = h32_from_64_S (a);
197 const u32 a1 = l32_from_64_S (a);
199 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
200 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
202 const u64 r = hl32_to_64_S (t0, t1);
207 inline u64 rotl64_S (const u64 a, const u32 n)
209 return rotr64_S (a, 64 - n);
212 inline u32x swap32 (const u32x v)
214 return ((v >> 24) & 0x000000ff)
215 | ((v >> 8) & 0x0000ff00)
216 | ((v << 8) & 0x00ff0000)
217 | ((v << 24) & 0xff000000);
220 inline u64x swap64 (const u64x v)
222 return ((v >> 56) & 0x00000000000000ff)
223 | ((v >> 40) & 0x000000000000ff00)
224 | ((v >> 24) & 0x0000000000ff0000)
225 | ((v >> 8) & 0x00000000ff000000)
226 | ((v << 8) & 0x000000ff00000000)
227 | ((v << 24) & 0x0000ff0000000000)
228 | ((v << 40) & 0x00ff000000000000)
229 | ((v << 56) & 0xff00000000000000);
232 inline u32x rotr32 (const u32x a, const u32 n)
234 return rotate (a, 32 - n);
237 inline u32x rotl32 (const u32x a, const u32 n)
239 return rotate (a, n);
242 inline u64x rotr64 (const u64x a, const u32 n)
244 const u32x a0 = h32_from_64 (a);
245 const u32x a1 = l32_from_64 (a);
247 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
248 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
250 const u64x r = hl32_to_64 (t0, t1);
255 inline u64x rotl64 (const u64x a, const u32 n)
257 return rotr64 (a, 64 - n);
260 inline u32x __bfe (const u32x a, const u32x b, const u32x c)
262 return amd_bfe (a, b, c);
265 inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
267 return amd_bfe (a, b, c);
270 inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
272 return amd_bytealign (a, b, c);
277 inline u32 swap32_S (const u32 v)
281 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
286 inline u64 swap64_S (const u64 v)
291 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
296 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
297 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
301 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
306 inline u32 rotr32_S (const u32 a, const u32 n)
308 return rotate (a, 32 - n);
311 inline u32 rotl32_S (const u32 a, const u32 n)
313 return rotate (a, n);
316 inline u64 rotr64_S (const u64 a, const u32 n)
318 return rotate (a, (u64) 64 - n);
321 inline u64 rotl64_S (const u64 a, const u32 n)
323 return rotr64_S (a, 64 - n);
326 inline u32x swap32 (const u32x v)
328 return ((v >> 24) & 0x000000ff)
329 | ((v >> 8) & 0x0000ff00)
330 | ((v << 8) & 0x00ff0000)
331 | ((v << 24) & 0xff000000);
334 inline u64x swap64 (const u64x v)
336 return ((v >> 56) & 0x00000000000000ff)
337 | ((v >> 40) & 0x000000000000ff00)
338 | ((v >> 24) & 0x0000000000ff0000)
339 | ((v >> 8) & 0x00000000ff000000)
340 | ((v << 8) & 0x000000ff00000000)
341 | ((v << 24) & 0x0000ff0000000000)
342 | ((v << 40) & 0x00ff000000000000)
343 | ((v << 56) & 0xff00000000000000);
346 inline u32x rotr32 (const u32x a, const u32 n)
348 return rotate (a, 32 - n);
351 inline u32x rotl32 (const u32x a, const u32 n)
353 return rotate (a, n);
356 inline u64x rotr64 (const u64x a, const u32 n)
358 return rotate (a, (u64) 64 - n);
361 inline u64x rotl64 (const u64x a, const u32 n)
363 return rotate (a, (u64) n);
366 inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
371 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
375 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
376 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
380 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
381 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
385 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
386 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
387 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
388 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
392 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
393 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
394 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
395 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
396 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
397 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
398 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
399 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
405 inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
409 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
414 inline u32x __bfe (const u32x a, const u32x b, const u32x c)
419 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
423 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
424 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
428 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
429 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
433 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
434 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
435 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
436 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
440 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
441 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
442 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
443 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
444 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
445 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
446 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
447 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
453 inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
457 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
462 inline u32x amd_bytealign (const u32x a, const u32x b, const u32x c)
469 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
473 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(b.s0), "r"(a.s0), "r"((c.s0 & 3) * 8));
474 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(b.s1), "r"(a.s1), "r"((c.s1 & 3) * 8));
478 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(b.s2), "r"(a.s2), "r"((c.s2 & 3) * 8));
479 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(b.s3), "r"(a.s3), "r"((c.s3 & 3) * 8));
483 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(b.s4), "r"(a.s4), "r"((c.s4 & 3) * 8));
484 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(b.s5), "r"(a.s5), "r"((c.s5 & 3) * 8));
485 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(b.s6), "r"(a.s6), "r"((c.s6 & 3) * 8));
486 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(b.s7), "r"(a.s7), "r"((c.s7 & 3) * 8));
490 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(b.s8), "r"(a.s8), "r"((c.s8 & 3) * 8));
491 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(b.s9), "r"(a.s9), "r"((c.s9 & 3) * 8));
492 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(b.sa), "r"(a.sa), "r"((c.sa & 3) * 8));
493 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(b.sb), "r"(a.sb), "r"((c.sb & 3) * 8));
494 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(b.sc), "r"(a.sc), "r"((c.sc & 3) * 8));
495 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(b.sd), "r"(a.sd), "r"((c.sd & 3) * 8));
496 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(b.se), "r"(a.se), "r"((c.se & 3) * 8));
497 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(b.sf), "r"(a.sf), "r"((c.sf & 3) * 8));
502 r = __byte_perm (b, a, ((u32x) (0x76543210) >> ((c & 3) * 4)) & 0xffff);
509 inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
515 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
519 r = __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
528 inline u32 swap32_S (const u32 v)
530 return (as_uint (as_uchar4 (v).s3210));
533 inline u64 swap64_S (const u64 v)
535 return (as_ulong (as_uchar8 (v).s76543210));
538 inline u32 rotr32_S (const u32 a, const u32 n)
540 return rotate (a, 32 - n);
543 inline u32 rotl32_S (const u32 a, const u32 n)
545 return rotate (a, n);
548 inline u64 rotr64_S (const u64 a, const u32 n)
550 return rotate (a, (u64) 64 - n);
553 inline u64 rotl64_S (const u64 a, const u32 n)
555 return rotate (a, (u64) n);
558 inline u32x swap32 (const u32x v)
560 return ((v >> 24) & 0x000000ff)
561 | ((v >> 8) & 0x0000ff00)
562 | ((v << 8) & 0x00ff0000)
563 | ((v << 24) & 0xff000000);
566 inline u64x swap64 (const u64x v)
568 return ((v >> 56) & 0x00000000000000ff)
569 | ((v >> 40) & 0x000000000000ff00)
570 | ((v >> 24) & 0x0000000000ff0000)
571 | ((v >> 8) & 0x00000000ff000000)
572 | ((v << 8) & 0x000000ff00000000)
573 | ((v << 24) & 0x0000ff0000000000)
574 | ((v << 40) & 0x00ff000000000000)
575 | ((v << 56) & 0xff00000000000000);
578 inline u32x rotr32 (const u32x a, const u32 n)
580 return rotate (a, 32 - n);
583 inline u32x rotl32 (const u32x a, const u32 n)
585 return rotate (a, n);
588 inline u64x rotr64 (const u64x a, const u32 n)
590 return rotate (a, (u64) 64 - n);
593 inline u64x rotl64 (const u64x a, const u32 n)
595 return rotate (a, (u64) n);
598 inline u32x __bfe (const u32x a, const u32x b, const u32x c)
600 #define BIT(x) ((u32x) (1u) << (x))
601 #define BIT_MASK(x) (BIT (x) - 1)
602 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
604 return BFE (a, b, c);
611 inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
613 #define BIT(x) (1u << (x))
614 #define BIT_MASK(x) (BIT (x) - 1)
615 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
617 return BFE (a, b, c);
624 inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
627 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
633 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
635 return (u32x) (tmp.s0, tmp.s1);
639 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
641 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
645 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
647 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
651 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
653 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
657 inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
659 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
668 u32 digest_buf[DGST_ELEM];
731 u32 cry_master_buf[64];
733 u32 public_key_buf[64];
774 u32 userdomain_buf[64];
791 u32 account_info[512];
847 u32 encryptedVerifier[4];
848 u32 encryptedVerifierHash[5];
856 u32 encryptedVerifier[4];
857 u32 encryptedVerifierHash[8];
863 u32 encryptedVerifier[4];
864 u32 encryptedVerifierHash[8];
871 u32 encryptedVerifier[4];
872 u32 encryptedVerifierHash[4];
880 u32 encryptedVerifier[4];
881 u32 encryptedVerifierHash[5];
923 /* key-file handling */
927 u32 final_random_seed[8];
928 u32 transf_random_seed[8];
930 u32 contents_hash[8];
932 /* specific to version 1 */
936 /* specific to version 2 */
937 u32 expected_bytes[8];
1005 } bitcoin_wallet_tmp_t;
1103 } pbkdf2_sha1_tmp_t;
1113 } pbkdf2_sha256_tmp_t;
1123 } pbkdf2_sha512_tmp_t;
1343 u32 alignment_placeholder_1;
1344 u32 alignment_placeholder_2;
1345 u32 alignment_placeholder_3;
1371 #ifndef SCRYPT_TMP_ELEM
1372 #define SCRYPT_TMP_ELEM 1
1375 uint4 P[SCRYPT_TMP_ELEM];