Tor 0.4.9.0-alpha-dev
keccak-tiny-unrolled.c
1/** libkeccak-tiny
2 *
3 * A single-file implementation of SHA-3 and SHAKE.
4 *
5 * Implementor: David Leon Gil
6 * License: CC0, attribution kindly requested. Blame taken too,
7 * but not liability.
8 */
9#include "keccak-tiny.h"
10
11#include <string.h>
13#include "byteorder.h"
14
15/******** Endianness conversion helpers ********/
16
17static inline uint64_t
18loadu64le(const unsigned char *x) {
19 uint64_t r;
20 memcpy(&r, x, sizeof(r));
21 return _le64toh(r);
22}
23
24static inline void
25storeu64le(uint8_t *x, uint64_t u) {
26 uint64_t val = _le64toh(u);
27 memcpy(x, &val, sizeof(u));
28}
29
30/******** The Keccak-f[1600] permutation ********/
31
32/*** Constants. ***/
33static const uint8_t rho[24] = \
34 { 1, 3, 6, 10, 15, 21,
35 28, 36, 45, 55, 2, 14,
36 27, 41, 56, 8, 25, 43,
37 62, 18, 39, 61, 20, 44};
38static const uint8_t pi[24] = \
39 {10, 7, 11, 17, 18, 3,
40 5, 16, 8, 21, 24, 4,
41 15, 23, 19, 13, 12, 2,
42 20, 14, 22, 9, 6, 1};
43static const uint64_t RC[24] = \
44 {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
45 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
46 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
47 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
48 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
49 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
50
51/*** Helper macros to unroll the permutation. ***/
52#define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
53#define REPEAT6(e) e e e e e e
54#define REPEAT24(e) REPEAT6(e e e e)
55#define REPEAT5(e) e e e e e
56#define FOR5(v, s, e) \
57 v = 0; \
58 REPEAT5(e; v += s;)
59
60/*** Keccak-f[1600] ***/
61static inline void keccakf(void* state) {
62 uint64_t* a = (uint64_t*)state;
63 uint64_t b[5] = {0};
64 uint64_t t = 0;
65 uint8_t x, y, i = 0;
66
67 REPEAT24(
68 // Theta
69 FOR5(x, 1,
70 b[x] = 0;
71 FOR5(y, 5,
72 b[x] ^= a[x + y]; ))
73 FOR5(x, 1,
74 FOR5(y, 5,
75 a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
76 // Rho and pi
77 t = a[1];
78 x = 0;
79 REPEAT24(b[0] = a[pi[x]];
80 a[pi[x]] = rol(t, rho[x]);
81 t = b[0];
82 x++; )
83 // Chi
84 FOR5(y,
85 5,
86 FOR5(x, 1,
87 b[x] = a[y + x];)
88 FOR5(x, 1,
89 a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
90 // Iota
91 a[0] ^= RC[i];
92 i++; )
93}
94
95/******** The FIPS202-defined functions. ********/
96
97/*** Some helper macros. ***/
98
99// `xorin` modified to handle Big Endian systems, `buf` being unaligned on
100// systems that care about such things. Assumes that len is a multiple of 8,
101// which is always true for the rates we use, and the modified finalize.
102static inline void
103xorin8(uint8_t *dst, const uint8_t *src, size_t len) {
104 uint64_t* a = (uint64_t*)dst; // Always aligned.
105 for (size_t i = 0; i < len; i += 8) {
106 a[i/8] ^= loadu64le(src + i);
107 }
108}
109
110// `setout` likewise modified to handle Big Endian systems. Assumes that len
111// is a multiple of 8, which is true for every rate we use.
112static inline void
113setout8(const uint8_t *src, uint8_t *dst, size_t len) {
114 const uint64_t *si = (const uint64_t*)src; // Always aligned.
115 for (size_t i = 0; i < len; i+= 8) {
116 storeu64le(dst+i, si[i/8]);
117 }
118}
119
120#define P keccakf
121#define Plen KECCAK_MAX_RATE
122
123#define KECCAK_DELIM_DIGEST 0x06
124#define KECCAK_DELIM_XOF 0x1f
125
126// Fold P*F over the full blocks of an input.
127#define foldP(I, L, F) \
128 while (L >= s->rate) { \
129 F(s->a, I, s->rate); \
130 P(s->a); \
131 I += s->rate; \
132 L -= s->rate; \
133 }
134
135static inline void
136keccak_absorb_blocks(keccak_state *s, const uint8_t *buf, size_t nr_blocks)
137{
138 size_t blen = nr_blocks * s->rate;
139 foldP(buf, blen, xorin8);
140}
141
142static int
143keccak_update(keccak_state *s, const uint8_t *buf, size_t len)
144{
145 if (s->finalized)
146 return -1;
147 if ((buf == NULL) && len != 0)
148 return -1;
149
150 size_t remaining = len;
151 while (remaining > 0) {
152 if (s->offset == 0) {
153 const size_t blocks = remaining / s->rate;
154 size_t direct_bytes = blocks * s->rate;
155 if (direct_bytes > 0) {
156 keccak_absorb_blocks(s, buf, blocks);
157 remaining -= direct_bytes;
158 buf += direct_bytes;
159 }
160 }
161
162 const size_t buf_avail = s->rate - s->offset;
163 const size_t buf_bytes = (buf_avail > remaining) ? remaining : buf_avail;
164 if (buf_bytes > 0) {
165 memcpy(&s->block[s->offset], buf, buf_bytes);
166 s->offset += buf_bytes;
167 remaining -= buf_bytes;
168 buf += buf_bytes;
169 }
170 if (s->offset == s->rate) {
171 keccak_absorb_blocks(s, s->block, 1);
172 s->offset = 0;
173 }
174 }
175 return 0;
176}
177
178static void
179keccak_finalize(keccak_state *s)
180{
181 // Xor in the DS and pad frame.
182 s->block[s->offset++] = s->delim; // DS.
183 for (size_t i = s->offset; i < s->rate; i++) {
184 s->block[i] = 0;
185 }
186 s->block[s->rate - 1] |= 0x80; // Pad frame.
187
188 // Xor in the last block.
189 xorin8(s->a, s->block, s->rate);
190
191 memwipe(s->block, 0, sizeof(s->block));
192 s->finalized = 1;
193 s->offset = s->rate;
194}
195
196static inline void
197keccak_squeeze_blocks(keccak_state *s, uint8_t *out, size_t nr_blocks)
198{
199 for (size_t n = 0; n < nr_blocks; n++) {
200 keccakf(s->a);
201 setout8(s->a, out, s->rate);
202 out += s->rate;
203 }
204}
205
206static int
207keccak_squeeze(keccak_state *s, uint8_t *out, size_t outlen)
208{
209 if (!s->finalized)
210 return -1;
211
212 size_t remaining = outlen;
213 while (remaining > 0) {
214 if (s->offset == s->rate) {
215 const size_t blocks = remaining / s->rate;
216 const size_t direct_bytes = blocks * s->rate;
217 if (blocks > 0) {
218 keccak_squeeze_blocks(s, out, blocks);
219 out += direct_bytes;
220 remaining -= direct_bytes;
221 }
222
223 if (remaining > 0) {
224 keccak_squeeze_blocks(s, s->block, 1);
225 s->offset = 0;
226 }
227 }
228
229 const size_t buf_bytes = s->rate - s->offset;
230 const size_t indirect_bytes = (buf_bytes > remaining) ? remaining : buf_bytes;
231 if (indirect_bytes > 0) {
232 memcpy(out, &s->block[s->offset], indirect_bytes);
233 out += indirect_bytes;
234 s->offset += indirect_bytes;
235 remaining -= indirect_bytes;
236 }
237 }
238 return 0;
239}
240
241int
242keccak_digest_init(keccak_state *s, size_t bits)
243{
244 if (s == NULL)
245 return -1;
246 if (bits != 224 && bits != 256 && bits != 384 && bits != 512)
247 return -1;
248
249 keccak_cleanse(s);
250 s->rate = KECCAK_RATE(bits);
251 s->delim = KECCAK_DELIM_DIGEST;
252 return 0;
253}
254
255int
256keccak_digest_update(keccak_state *s, const uint8_t *buf, size_t len)
257{
258 if (s == NULL)
259 return -1;
260 if (s->delim != KECCAK_DELIM_DIGEST)
261 return -1;
262
263 return keccak_update(s, buf, len);
264}
265
266int
267keccak_digest_sum(const keccak_state *s, uint8_t *out, size_t outlen)
268{
269 if (s == NULL)
270 return -1;
271 if (s->delim != KECCAK_DELIM_DIGEST)
272 return -1;
273 if (out == NULL || outlen > 4 * (KECCAK_MAX_RATE - s->rate) / 8)
274 return -1;
275
276 // Work in a copy so that incremental/rolling hashes are easy.
277 keccak_state s_tmp;
278 keccak_clone(&s_tmp, s);
279 keccak_finalize(&s_tmp);
280 int ret = keccak_squeeze(&s_tmp, out, outlen);
281 keccak_cleanse(&s_tmp);
282 return ret;
283}
284
285int
286keccak_xof_init(keccak_state *s, size_t bits)
287{
288 if (s == NULL)
289 return -1;
290 if (bits != 128 && bits != 256)
291 return -1;
292
293 keccak_cleanse(s);
294 s->rate = KECCAK_RATE(bits);
295 s->delim = KECCAK_DELIM_XOF;
296 return 0;
297}
298
299int
300keccak_xof_absorb(keccak_state *s, const uint8_t *buf, size_t len)
301{
302 if (s == NULL)
303 return -1;
304 if (s->delim != KECCAK_DELIM_XOF)
305 return -1;
306
307 return keccak_update(s, buf, len);
308}
309
310int
311keccak_xof_squeeze(keccak_state *s, uint8_t *out, size_t outlen)
312{
313 if (s == NULL)
314 return -1;
315 if (s->delim != KECCAK_DELIM_XOF)
316 return -1;
317
318 if (!s->finalized)
319 keccak_finalize(s);
320
321 return keccak_squeeze(s, out, outlen);
322}
323
324void
325keccak_clone(keccak_state *out, const keccak_state *in)
326{
327 memcpy(out, in, sizeof(keccak_state));
328}
329
330void
331keccak_cleanse(keccak_state *s)
332{
333 memwipe(s, 0, sizeof(keccak_state));
334}
335
336/** The sponge-based hash construction. **/
337static inline int hash(uint8_t* out, size_t outlen,
338 const uint8_t* in, size_t inlen,
339 size_t bits, uint8_t delim) {
340 if ((out == NULL) || ((in == NULL) && inlen != 0)) {
341 return -1;
342 }
343
344 int ret = 0;
345 keccak_state s;
346 keccak_cleanse(&s);
347
348 switch (delim) {
349 case KECCAK_DELIM_DIGEST:
350 ret |= keccak_digest_init(&s, bits);
351 ret |= keccak_digest_update(&s, in, inlen);
352 // Use the internal API instead of sum to avoid the memcpy.
353 keccak_finalize(&s);
354 ret |= keccak_squeeze(&s, out, outlen);
355 break;
356 case KECCAK_DELIM_XOF:
357 ret |= keccak_xof_init(&s, bits);
358 ret |= keccak_xof_absorb(&s, in, inlen);
359 ret |= keccak_xof_squeeze(&s, out, outlen);
360 break;
361 default:
362 return -1;
363 }
364 keccak_cleanse(&s);
365 return ret;
366}
367
368/*** Helper macros to define SHA3 and SHAKE instances. ***/
369#define defshake(bits) \
370 int shake##bits(uint8_t* out, size_t outlen, \
371 const uint8_t* in, size_t inlen) { \
372 return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_XOF); \
373 }
374#define defsha3(bits) \
375 int sha3_##bits(uint8_t* out, size_t outlen, \
376 const uint8_t* in, size_t inlen) { \
377 if (outlen > (bits/8)) { \
378 return -1; \
379 } \
380 return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_DIGEST); \
381 }
382
383/*** FIPS202 SHAKE VOFs ***/
384defshake(128)
385defshake(256)
386
387/*** FIPS202 SHA3 FOFs ***/
388defsha3(224)
389defsha3(256)
390defsha3(384)
391defsha3(512)
void memwipe(void *mem, uint8_t byte, size_t sz)
Definition: crypto_util.c:55
Common functions for cryptographic routines.