Tor 0.4.9.2-alpha-dev
All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
polyval.c
1/* Copyright (c) 2025, The Tor Project, Inc. */
2/* See LICENSE for licensing information */
3
4/**
5 * \file polyval.h
6 * \brief Implementation for polyval universal hash function.
7 *
8 * Polyval, which was first defined for AES-GCM-SIV, is a
9 * universal hash based on multiplication in GF(2^128).
10 * Unlike the more familiar GHASH, it is defined to work on
11 * little-endian inputs, and so is more straightforward and efficient
12 * on little-endian architectures.
13 *
14 * In Tor we use it as part of the Counter Galois Onion relay
15 * encryption format.
16 **/
17
18/* Implementation notes:
19 *
20 * Our implementation is based on the GHASH code from BearSSL
21 * by Thomas Pornin. There are three implementations:
22 *
23 * pclmul.c -- An x86-only version, based on the CLMUL instructions
24 * introduced for westlake processors in 2010.
25 *
26 * ctmul64.c -- A portable contant-time implementation for 64-bit
27 * processors.
28 *
29 * ctmul.c -- A portable constant-time implementation for 32-bit
30 * processors with an efficient 32x32->64 multiply instruction.
31 *
32 * Note that the "ctmul" implementations are only constant-time
33 * if the corresponding CPU multiply and rotate instructions are
34 * also constant-time. But that's an architectural assumption we
35 * make in Tor.
36 */
37
38#include "ext/polyval/polyval.h"
39
40#include <string.h>
41
42#ifdef PV_USE_PCLMUL_DETECT
43#include <cpuid.h>
44#endif
45
46typedef pv_u128_ u128;
47
48/* ========
49 * We declare these functions, to help implement polyval.
50 *
51 * They have different definitions depending on our representation
52 * of 128-bit integers.
53 */
54#if 0
55/**
56 * Read a u128-bit little-endian integer from 'bytes',
57 * which may not be aligned.
58 */
59static inline u128 u128_from_bytes(const uint8_t *bytes);
60/**
61 * Store a u128-bit little-endian integer to 'bytes_out',
62 * which may not be aligned.
63 */
64static inline void u128_to_bytes(u128, uint8_t *bytes_out);
65/**
66 * XOR a u128 into the y field of a polyval_t struct.
67 *
68 * (Within the polyval struct, perform "y ^= v").
69 */
70static inline void pv_xor_y(polyval_t *, u128 v);
71
72/* ========
73 * The function which we expect our backend to implement.
74 */
75/**
76 * Within the polyval struct, perform "y *= h".
77 *
78 * (This is a carryless multiply in the Polyval galois field)
79 */
80static void pv_mul_y_h(polyval_t *);h
81#endif
82
83/* =====
84 * Endianness conversion for big-endian platforms
85 */
86#ifdef WORDS_BIG_ENDIAN
87#ifdef __GNUC__
88#define bswap64(x) __builtin_bswap64(x)
89#define bswap32(x) __builtin_bswap32(x)
90#else
91/* The (compiler should optimize these into a decent bswap instruction) */
92static inline uint64_t
93bswap64(uint64_t v)
94{
95 return
96 ((value & 0xFF00000000000000) >> 56) |
97 ((value & 0x00FF000000000000) >> 48) |
98 ((value & 0x0000FF0000000000) >> 40) |
99 ((value & 0x000000FF00000000) >> 32) |
100 ((value & 0x00000000FF000000) >> 24) |
101 ((value & 0x0000000000FF0000) >> 16) |
102 ((value & 0x000000000000FF00) >> 8) |
103 ((value & 0x00000000000000FF));
104}
105static inline uint64_t
106bswap32(uint64_t v)
107{
108 return
109 ((value & 0xFF000000) >> 24) |
110 ((value & 0x00FF0000) >> 16) |
111 ((value & 0x0000FF00) >> 8) |
112 ((value & 0x000000FF));
113}
114#endif
115#endif
116
117#ifdef WORDS_BIG_ENDIAN
118#define convert_byte_order64(x) bswap64(x)
119#define convert_byte_order32(x) bswap32(x)
120#else
121#define convert_byte_order64(x) (x)
122#define convert_byte_order32(x) (x)
123#endif
124
125#if defined PV_USE_PCLMUL_UNCONDITIONAL
126#define PCLMUL_MEMBER(v) (v)
127#define PV_USE_PCLMUL
128
129#elif defined PV_USE_PCLMUL_DETECT
130#define PCLMUL_MEMBER(v) (v).u128x1
131#define CTMUL64_MEMBER(v) (v).u64x2
132#define PV_USE_PCLMUL
133#define PV_USE_CTMUL64
134
135#elif defined PV_USE_CTMUL64
136#define CTMUL64_MEMBER(v) (v)
137#endif
138
139#ifdef PV_USE_PCLMUL
140#include "ext/polyval/pclmul.c"
141
142static inline u128
143u128_from_bytes_pclmul(const uint8_t *bytes)
144{
145 u128 r;
146 PCLMUL_MEMBER(r) = _mm_loadu_si128((const __m128i*)bytes);
147 return r;
148}
149static inline void
150u128_to_bytes_pclmul(u128 val, uint8_t *bytes_out)
151{
152 _mm_storeu_si128((__m128i*)bytes_out, PCLMUL_MEMBER(val));
153}
154static inline void
155pv_xor_y_pclmul(polyval_t *pv, u128 v)
156{
157 PCLMUL_MEMBER(pv->y) = _mm_xor_si128(PCLMUL_MEMBER(pv->y),
158 PCLMUL_MEMBER(v));
159}
160#endif
161
162#if defined(PV_USE_CTMUL64)
163#include "ext/polyval/ctmul64.c"
164
165static inline u128
166u128_from_bytes_ctmul64(const uint8_t *bytes)
167{
168 u128 r;
169 memcpy(&CTMUL64_MEMBER(r).lo, bytes, 8);
170 memcpy(&CTMUL64_MEMBER(r).hi, bytes + 8, 8);
171 CTMUL64_MEMBER(r).lo = convert_byte_order64(CTMUL64_MEMBER(r).lo);
172 CTMUL64_MEMBER(r).hi = convert_byte_order64(CTMUL64_MEMBER(r).hi);
173 return r;
174}
175static inline void
176u128_to_bytes_ctmul64(u128 val, uint8_t *bytes_out)
177{
178 uint64_t lo = convert_byte_order64(CTMUL64_MEMBER(val).lo);
179 uint64_t hi = convert_byte_order64(CTMUL64_MEMBER(val).hi);
180 memcpy(bytes_out, &lo, 8);
181 memcpy(bytes_out + 8, &hi, 8);
182}
183static inline void
184pv_xor_y_ctmul64(polyval_t *pv, u128 val)
185{
186 CTMUL64_MEMBER(pv->y).lo ^= CTMUL64_MEMBER(val).lo;
187 CTMUL64_MEMBER(pv->y).hi ^= CTMUL64_MEMBER(val).hi;
188}
189#endif
190
191#if defined(PV_USE_CTMUL)
192#include "ext/polyval/ctmul.c"
193
194static inline u128
195u128_from_bytes_ctmul(const uint8_t *bytes)
196{
197 u128 r;
198 memcpy(&r.v, bytes, 16);
199 for (int i = 0; i < 4; ++i) {
200 r.v[i] = convert_byte_order32(r.v[i]);
201 }
202 return r;
203}
204static inline void
205u128_to_bytes_ctmul(u128 val, uint8_t *bytes_out)
206{
207 uint32_t v[4];
208 for (int i = 0; i < 4; ++i) {
209 v[i] = convert_byte_order32(val.v[i]);
210 }
211 memcpy(bytes_out, v, 16);
212}
213static inline void
214pv_xor_y_ctmul(polyval_t *pv, u128 val)
215{
216 for (int i = 0; i < 4; ++i) {
217 pv->y.v[i] ^= val.v[i];
218 }
219}
220#endif
221
223static inline void add_multiple_none(polyval_t *pv,
224 const uint8_t *input,
225 const struct expanded_key_none *expanded)
226{
227 (void) pv;
228 (void) input;
229 (void) expanded;
230}
231static inline void expand_key_none(const polyval_t *inp,
232 struct expanded_key_none *out)
233{
234 (void) inp;
235 (void) out;
236}
237
238/* Kludge: a special value to use for block_stride when we don't support
239 * processing multiple blocks at once. Previously we used 0, but that
240 * caused warnings with some comparisons. */
241#define BLOCK_STRIDE_NONE 0xffff
242
243#define PV_DECLARE(prefix, \
244 st, \
245 u128_from_bytes, \
246 u128_to_bytes, \
247 pv_xor_y, \
248 pv_mul_y_h, \
249 block_stride, \
250 expanded_key_tp, expand_fn, add_multiple_fn) \
251 st void \
252 prefix ## polyval_key_init(polyval_key_t *pvk, const uint8_t *key) \
253 { \
254 pvk->h = u128_from_bytes(key); \
255 } \
256 st void \
257 prefix ## polyval_init(polyval_t *pv, const uint8_t *key) \
258 { \
259 polyval_key_init(&pv->key, key); \
260 memset(&pv->y, 0, sizeof(u128)); \
261 } \
262 st void \
263 prefix ## polyval_init_from_key(polyval_t *pv, const polyval_key_t *key) \
264 { \
265 memcpy(&pv->key, key, sizeof(polyval_key_t)); \
266 memset(&pv->y, 0, sizeof(u128)); \
267 } \
268 st void \
269 prefix ## polyval_add_block(polyval_t *pv, const uint8_t *block) \
270 { \
271 u128 b = u128_from_bytes(block); \
272 pv_xor_y(pv, b); \
273 pv_mul_y_h(pv); \
274 } \
275 st void \
276 prefix ## polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n) \
277 { \
278 /* since block_stride is a constant, this should get optimized */ \
279 if ((block_stride != BLOCK_STRIDE_NONE) \
280 && n >= (block_stride) * 16) { \
281 expanded_key_tp expanded_key; \
282 expand_fn(pv, &expanded_key); \
283 while (n >= (block_stride) * 16) { \
284 add_multiple_fn(pv, data, &expanded_key); \
285 n -= block_stride*16; \
286 data += block_stride * 16; \
287 } \
288 } \
289 while (n > 16) { \
290 polyval_add_block(pv, data); \
291 data += 16; \
292 n -= 16; \
293 } \
294 if (n) { \
295 uint8_t block[16]; \
296 memset(&block, 0, sizeof(block)); \
297 memcpy(block, data, n); \
298 polyval_add_block(pv, block); \
299 } \
300 } \
301 st void \
302 prefix ## polyval_get_tag(const polyval_t *pv, uint8_t *tag_out) \
303 { \
304 u128_to_bytes(pv->y, tag_out); \
305 } \
306 st void \
307 prefix ## polyval_reset(polyval_t *pv) \
308 { \
309 memset(&pv->y, 0, sizeof(u128)); \
310 }
311
312#ifdef PV_USE_PCLMUL_DETECT
313/* We use a boolean to distinguish whether to use the PCLMUL instructions,
314 * but instead we could use function pointers. It's probably worth
315 * benchmarking, though it's unlikely to make a measurable difference.
316 */
317static bool use_pclmul = false;
318
319/* Declare _both_ variations of our code, statically,
320 * with different prefixes. */
321PV_DECLARE(pclmul_, static,
322 u128_from_bytes_pclmul,
323 u128_to_bytes_pclmul,
324 pv_xor_y_pclmul,
325 pv_mul_y_h_pclmul,
326 PV_BLOCK_STRIDE,
327 pv_expanded_key_t,
328 expand_key_pclmul,
329 pv_add_multiple_pclmul)
330
331PV_DECLARE(ctmul64_, static,
332 u128_from_bytes_ctmul64,
333 u128_to_bytes_ctmul64,
334 pv_xor_y_ctmul64,
335 pv_mul_y_h_ctmul64,
336 BLOCK_STRIDE_NONE,
337 struct expanded_key_none,
338 expand_key_none,
339 add_multiple_none)
340
341void
342polyval_key_init(polyval_key_t *pv, const uint8_t *key)
343{
344 if (use_pclmul)
345 pclmul_polyval_key_init(pv, key);
346 else
347 ctmul64_polyval_key_init(pv, key);
348}
349void
350polyval_init(polyval_t *pv, const uint8_t *key)
351{
352 if (use_pclmul)
353 pclmul_polyval_init(pv, key);
354 else
355 ctmul64_polyval_init(pv, key);
356}
357void
359{
360 if (use_pclmul)
361 pclmul_polyval_init_from_key(pv, key);
362 else
363 ctmul64_polyval_init_from_key(pv, key);
364}
365void
366polyval_add_block(polyval_t *pv, const uint8_t *block)
367{
368 if (use_pclmul)
369 pclmul_polyval_add_block(pv, block);
370 else
371 ctmul64_polyval_add_block(pv, block);
372}
373void
374polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n)
375{
376 if (use_pclmul)
377 pclmul_polyval_add_zpad(pv, data, n);
378 else
379 ctmul64_polyval_add_zpad(pv, data, n);
380}
381void
382polyval_get_tag(const polyval_t *pv, uint8_t *tag_out)
383{
384 if (use_pclmul)
385 pclmul_polyval_get_tag(pv, tag_out);
386 else
387 ctmul64_polyval_get_tag(pv, tag_out);
388}
389void
391{
392 if (use_pclmul)
393 pclmul_polyval_reset(pv);
394 else
395 ctmul64_polyval_reset(pv);
396}
397
398#elif defined(PV_USE_PCLMUL)
399PV_DECLARE(, ,
400 u128_from_bytes_pclmul,
401 u128_to_bytes_pclmul,
402 pv_xor_y_pclmul,
403 pv_mul_y_h_pclmul,
404 PV_BLOCK_STRIDE,
405 pv_expanded_key_t,
406 expand_key_pclmul,
407 pv_add_multiple_pclmul)
408#elif defined(PV_USE_CTMUL64)
409PV_DECLARE(, ,
410 u128_from_bytes_ctmul64,
411 u128_to_bytes_ctmul64,
412 pv_xor_y_ctmul64,
413 pv_mul_y_h_ctmul64,
414 BLOCK_STRIDE_NONE,
415 struct expanded_key_none,
416 expand_key_none,
417 add_multiple_none)
418
419#elif defined(PV_USE_CTMUL)
420PV_DECLARE(, , u128_from_bytes_ctmul,
421 u128_to_bytes_ctmul,
422 pv_xor_y_ctmul,
423 pv_mul_y_h_ctmul,
424 BLOCK_STRIDE_NONE,
425 struct expanded_key_none,
426 expand_key_none,
427 add_multiple_none)
428#endif
429
430#ifdef PV_USE_PCLMUL_DETECT
431void
433{
434 unsigned int eax, ebx, ecx, edx;
435 use_pclmul = false;
436 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
437 if (0 != (ecx & bit_PCLMUL)) {
438 use_pclmul = true;
439 }
440 }
441}
442#else
443void
445{
446}
447#endif
448
449#ifdef POLYVAL_USE_EXPANDED_KEYS
450
451#ifdef PV_USE_PCLMUL_DETECT
452#define SHOULD_EXPAND() (use_pclmul)
453#else
454#define SHOULD_EXPAND() (1)
455#endif
456
457void
458polyvalx_init(polyvalx_t *pvx, const uint8_t *key)
459{
460 polyval_init(&pvx->pv, key);
461 if (SHOULD_EXPAND()) {
462 expand_key_pclmul(&pvx->pv, &pvx->expanded);
463 }
464}
465void
466polyvalx_init_from_key(polyvalx_t *pvx, const polyval_key_t *key)
467{
468 polyval_init_from_key(&pvx->pv, key);
469 if (SHOULD_EXPAND()) {
470 expand_key_pclmul(&pvx->pv, &pvx->expanded);
471 }
472}
473void
474polyvalx_add_block(polyvalx_t *pvx, const uint8_t *block)
475{
476 polyval_add_block(&pvx->pv, block);
477}
478void
479polyvalx_add_zpad(polyvalx_t *pvx, const uint8_t *data, size_t n)
480{
481 if (SHOULD_EXPAND() && n >= PV_BLOCK_STRIDE * 16) {
482 while (n > PV_BLOCK_STRIDE * 16) {
483 pv_add_multiple_pclmul(&pvx->pv, data, &pvx->expanded);
484 data += PV_BLOCK_STRIDE * 16;
485 n -= PV_BLOCK_STRIDE * 16;
486 }
487 }
488 while (n > 16) {
489 polyval_add_block(&pvx->pv, data);
490 data += 16;
491 n -= 16;
492 }
493 if (n) {
494 uint8_t block[16];
495 memset(&block, 0, sizeof(block));
496 memcpy(block, data, n);
497 polyval_add_block(&pvx->pv, block);
498 }
499}
500void
501polyvalx_get_tag(const polyvalx_t *pvx, uint8_t *tag_out)
502{
503 polyval_get_tag(&pvx->pv, tag_out);
504}
505void polyvalx_reset(polyvalx_t *pvx)
506{
507 polyval_reset(&pvx->pv);
508}
509#endif
510
511#if 0
512#include <stdio.h>
513int
514main(int c, char **v)
515{
516 // From RFC 8452 appendix A
517 uint8_t key[16] =
518 { 0x25, 0x62, 0x93, 0x47, 0x58, 0x92, 0x42, 0x76,
519 0x1d, 0x31, 0xf8, 0x26, 0xba, 0x4b, 0x75, 0x7b };
520 uint8_t block1[16] =
521 { 0x4f, 0x4f, 0x95, 0x66, 0x8c, 0x83, 0xdf, 0xb6,
522 0x40, 0x17, 0x62, 0xbb, 0x2d, 0x01, 0xa2, 0x62 };
523 uint8_t block2[16] = {
524 0xd1, 0xa2, 0x4d, 0xdd, 0x27, 0x21, 0xd0, 0x06,
525 0xbb, 0xe4, 0x5f, 0x20, 0xd3, 0xc9, 0xf3, 0x62 };
526 uint8_t expect_result[16] = {
527 0xf7, 0xa3, 0xb4, 0x7b, 0x84, 0x61, 0x19, 0xfa,
528 0xe5, 0xb7, 0x86, 0x6c, 0xf5, 0xe5, 0xb7, 0x7e };
529
530 polyval_t pv;
531 uint8_t tag[16];
532 polyval_init(&pv, key);
533 polyval_add_block(&pv, block1);
534 polyval_add_block(&pv, block2);
535 polyval_get_tag(&pv, tag);
536 if (!memcmp(expect_result, tag, 16)) {
537 puts("OK");
538 return 0;
539 } else {
540 puts("NOPE");
541 return 1;
542 }
543}
544#endif
Implementation for polyval universal hash function.
void polyval_reset(polyval_t *)
void polyval_init(polyval_t *, const uint8_t *key)
void polyval_add_zpad(polyval_t *, const uint8_t *data, size_t n)
void polyval_add_block(polyval_t *, const uint8_t *block)
void polyval_get_tag(const polyval_t *, uint8_t *tag_out)
void polyval_init_from_key(polyval_t *, const polyval_key_t *key)
void polyval_detect_implementation(void)
Definition: polyval.c:444
void polyval_key_init(polyval_key_t *, const uint8_t *key)
pv_u128_ y
Definition: polyval.h:94
int main(int argc, char *argv[])
Definition: tor_main.c:25