31bmul64(uint64_t x, uint64_t y)
33 uint64_t x0, x1, x2, x3;
34 uint64_t y0, y1, y2, y3;
35 uint64_t z0, z1, z2, z3;
37 x0 = x & (uint64_t)0x1111111111111111;
38 x1 = x & (uint64_t)0x2222222222222222;
39 x2 = x & (uint64_t)0x4444444444444444;
40 x3 = x & (uint64_t)0x8888888888888888;
41 y0 = y & (uint64_t)0x1111111111111111;
42 y1 = y & (uint64_t)0x2222222222222222;
43 y2 = y & (uint64_t)0x4444444444444444;
44 y3 = y & (uint64_t)0x8888888888888888;
45 z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
46 z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
47 z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
48 z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
49 z0 &= (uint64_t)0x1111111111111111;
50 z1 &= (uint64_t)0x2222222222222222;
51 z2 &= (uint64_t)0x4444444444444444;
52 z3 &= (uint64_t)0x8888888888888888;
53 return z0 | z1 | z2 | z3;
59#define RMS(m, s) do { \
60 x = ((x & (uint64_t)(m)) << (s)) \
61 | ((x >> (s)) & (uint64_t)(m)); \
64 RMS(0x5555555555555555, 1);
65 RMS(0x3333333333333333, 2);
66 RMS(0x0F0F0F0F0F0F0F0F, 4);
67 RMS(0x00FF00FF00FF00FF, 8);
68 RMS(0x0000FFFF0000FFFF, 16);
69 return (x << 32) | (x >> 32);
79 uint64_t h0, h1, h2, h0r, h1r, h2r;
81 y0 = CTMUL64_MEMBER(pv->
y).lo;
82 y1 = CTMUL64_MEMBER(pv->
y).hi;
83 h0 = CTMUL64_MEMBER(pv->
key.h).lo;
84 h1 = CTMUL64_MEMBER(pv->
key.h).hi;
91 uint64_t y0r, y1r, y2, y2r;
92 uint64_t z0, z1, z2, z0h, z1h, z2h;
93 uint64_t v0, v1, v2, v3;
103 z0h = bmul64(y0r, h0r);
104 z1h = bmul64(y1r, h1r);
105 z2h = bmul64(y2r, h2r);
108 z0h = rev64(z0h) >> 1;
109 z1h = rev64(z1h) >> 1;
110 z2h = rev64(z2h) >> 1;
119 v3 = (v3 << 1) | (v2 >> 63);
120 v2 = (v2 << 1) | (v1 >> 63);
121 v1 = (v1 << 1) | (v0 >> 63);
125 v2 ^= v0 ^ (v0 >> 1) ^ (v0 >> 2) ^ (v0 >> 7);
126 v1 ^= (v0 << 63) ^ (v0 << 62) ^ (v0 << 57);
127 v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7);
128 v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57);
130 CTMUL64_MEMBER(pv->
y).lo = v2;
131 CTMUL64_MEMBER(pv->
y).hi = v3;