Tor 0.4.9.0-alpha-dev
binascii.c
Go to the documentation of this file.
1/* Copyright (c) 2001, Matej Pfajfar.
2 * Copyright (c) 2001-2004, Roger Dingledine.
3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 * Copyright (c) 2007-2021, The Tor Project, Inc. */
5/* See LICENSE for licensing information */
6
7/**
8 * \file binascii.c
9 *
10 * \brief Miscellaneous functions for encoding and decoding various things
11 * in base{16,32,64}.
12 */
13
14#include "orconfig.h"
15
17#include "lib/log/log.h"
18#include "lib/log/util_bug.h"
19#include "lib/cc/torint.h"
21#include "lib/intmath/muldiv.h"
22#include "lib/malloc/malloc.h"
23
24#include <stddef.h>
25#include <string.h>
26#include <stdlib.h>
27
28/** Return a pointer to a NUL-terminated hexadecimal string encoding
29 * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be <= 32.) The
30 * result does not need to be deallocated, but repeated calls to
31 * hex_str will trash old results.
32 */
33const char *
34hex_str(const char *from, size_t fromlen)
35{
36 static char buf[65];
37 if (fromlen>(sizeof(buf)-1)/2)
38 fromlen = (sizeof(buf)-1)/2;
39 base16_encode(buf,sizeof(buf),from,fromlen);
40 return buf;
41}
42
43/* Return the base32 encoded size in bytes using the source length srclen.
44 *
45 * (WATCH OUT: This API counts the terminating NUL byte, but
46 * base64_encode_size does not.)
47 */
48size_t
49base32_encoded_size(size_t srclen)
50{
51 size_t enclen;
52 tor_assert(srclen < SIZE_T_CEILING / 8);
53 enclen = BASE32_NOPAD_BUFSIZE(srclen);
54 tor_assert(enclen < INT_MAX && enclen > srclen);
55 return enclen;
56}
57
58/** Implements base32 encoding as in RFC 4648. */
59void
60base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
61{
62 unsigned int i, v, u;
63 size_t nbits = srclen * 8;
64 size_t bit;
65
66 /* We need enough space for the encoded data and the extra NUL byte. */
67 tor_assert(base32_encoded_size(srclen) <= destlen);
68 tor_assert(destlen < SIZE_T_CEILING);
69
70 /* Make sure we leave no uninitialized data in the destination buffer. */
71 memset(dest, 0, destlen);
72
73 for (i=0,bit=0; bit < nbits; ++i, bit+=5) {
74 /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
75 size_t idx = bit / 8;
76 v = ((uint8_t)src[idx]) << 8;
77 if (idx+1 < srclen)
78 v += (uint8_t)src[idx+1];
79 /* set u to the 5-bit value at the bit'th bit of buf. */
80 u = (v >> (11-(bit%8))) & 0x1F;
81 dest[i] = BASE32_CHARS[u];
82 }
83 dest[i] = '\0';
84}
85
86/** Implements base32 decoding as in RFC 4648.
87 * Return the number of bytes decoded if successful; -1 otherwise.
88 */
89int
90base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
91{
92 /* XXXX we might want to rewrite this along the lines of base64_decode, if
93 * it ever shows up in the profile. */
94 unsigned int i;
95 size_t nbits, j, bit;
96 char *tmp;
97 nbits = ((srclen * 5) / 8) * 8;
98
99 tor_assert(srclen < SIZE_T_CEILING / 5);
100 tor_assert((nbits/8) <= destlen); /* We need enough space. */
101 tor_assert(destlen < SIZE_T_CEILING);
102
103 /* Make sure we leave no uninitialized data in the destination buffer. */
104 memset(dest, 0, destlen);
105
106 /* Convert base32 encoded chars to the 5-bit values that they represent. */
107 tmp = tor_malloc_zero(srclen);
108 for (j = 0; j < srclen; ++j) {
109 if (src[j] > 0x60 && src[j] < 0x7B) tmp[j] = src[j] - 0x61;
110 else if (src[j] > 0x31 && src[j] < 0x38) tmp[j] = src[j] - 0x18;
111 else if (src[j] > 0x40 && src[j] < 0x5B) tmp[j] = src[j] - 0x41;
112 else {
113 log_warn(LD_GENERAL, "illegal character in base32 encoded string");
114 tor_free(tmp);
115 return -1;
116 }
117 }
118
119 /* Assemble result byte-wise by applying five possible cases. */
120 for (i = 0, bit = 0; bit < nbits; ++i, bit += 8) {
121 switch (bit % 40) {
122 case 0:
123 dest[i] = (((uint8_t)tmp[(bit/5)]) << 3) +
124 (((uint8_t)tmp[(bit/5)+1]) >> 2);
125 break;
126 case 8:
127 dest[i] = (((uint8_t)tmp[(bit/5)]) << 6) +
128 (((uint8_t)tmp[(bit/5)+1]) << 1) +
129 (((uint8_t)tmp[(bit/5)+2]) >> 4);
130 break;
131 case 16:
132 dest[i] = (((uint8_t)tmp[(bit/5)]) << 4) +
133 (((uint8_t)tmp[(bit/5)+1]) >> 1);
134 break;
135 case 24:
136 dest[i] = (((uint8_t)tmp[(bit/5)]) << 7) +
137 (((uint8_t)tmp[(bit/5)+1]) << 2) +
138 (((uint8_t)tmp[(bit/5)+2]) >> 3);
139 break;
140 case 32:
141 dest[i] = (((uint8_t)tmp[(bit/5)]) << 5) +
142 ((uint8_t)tmp[(bit/5)+1]);
143 break;
144 }
145 }
146
147 memset(tmp, 0, srclen); /* on the heap, this should be safe */
148 tor_free(tmp);
149 tmp = NULL;
150 return i;
151}
152
153#define BASE64_OPENSSL_LINELEN 64
154
155/** Return the Base64 encoded size of <b>srclen</b> bytes of data in
156 * bytes.
157 *
158 * (WATCH OUT: This API <em>does not</em> count the terminating NUL byte,
159 * but base32_encoded_size does.)
160 *
161 * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return the size
162 * of the encoded output as multiline output (64 character, `\n' terminated
163 * lines).
164 */
165size_t
166base64_encode_size(size_t srclen, int flags)
167{
168 size_t enclen;
169
170 /* Use INT_MAX for overflow checking because base64_encode() returns int. */
171 tor_assert(srclen < INT_MAX);
172 tor_assert(CEIL_DIV(srclen, 3) < INT_MAX / 4);
173
174 enclen = BASE64_LEN(srclen);
175 if (flags & BASE64_ENCODE_MULTILINE)
176 enclen += CEIL_DIV(enclen, BASE64_OPENSSL_LINELEN);
177
178 tor_assert(enclen < INT_MAX && (enclen == 0 || enclen > srclen));
179 return enclen;
180}
181
182/** Return an upper bound on the number of bytes that might be needed to hold
183 * the data from decoding the base64 string <b>srclen</b>. This is only an
184 * upper bound, since some part of the base64 string might be padding or
185 * space. */
186size_t
188{
189 tor_assert(srclen < INT_MAX / 3);
190
191 return CEIL_DIV(srclen * 3, 4);
192}
193
194/** Internal table mapping 6 bit values to the Base64 alphabet. */
195static const char base64_encode_table[64] = {
196 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
197 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
198 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
199 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
200 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
201 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
202 'w', 'x', 'y', 'z', '0', '1', '2', '3',
203 '4', '5', '6', '7', '8', '9', '+', '/'
204};
205
206/** Base64 encode <b>srclen</b> bytes of data from <b>src</b>. Write
207 * the result into <b>dest</b>, if it will fit within <b>destlen</b>
208 * bytes. Return the number of bytes written on success; -1 if
209 * destlen is too short, or other failure.
210 *
211 * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return encoded
212 * output in multiline format (64 character, `\n' terminated lines).
213 */
214int
215base64_encode(char *dest, size_t destlen, const char *src, size_t srclen,
216 int flags)
217{
218 const unsigned char *usrc = (unsigned char *)src;
219 const unsigned char *eous = usrc + srclen;
220 char *d = dest;
221 uint32_t n = 0;
222 size_t linelen = 0;
223 size_t enclen;
224 int n_idx = 0;
225
226 if (!src || !dest)
227 return -1;
228
229 /* Ensure that there is sufficient space, including the NUL. */
230 enclen = base64_encode_size(srclen, flags);
231 if (destlen < enclen + 1)
232 return -1;
233 if (destlen > SIZE_T_CEILING)
234 return -1;
235 if (enclen > INT_MAX)
236 return -1;
237
238 /* Make sure we leave no uninitialized data in the destination buffer. */
239 memset(dest, 0, destlen);
240
241 /* XXX/Yawning: If this ends up being too slow, this can be sped up
242 * by separating the multiline format case and the normal case, and
243 * processing 48 bytes of input at a time when newlines are desired.
244 */
245#define ENCODE_CHAR(ch) \
246 STMT_BEGIN \
247 *d++ = ch; \
248 if (flags & BASE64_ENCODE_MULTILINE) { \
249 if (++linelen % BASE64_OPENSSL_LINELEN == 0) { \
250 linelen = 0; \
251 *d++ = '\n'; \
252 } \
253 } \
254 STMT_END
255
256#define ENCODE_N(idx) \
257 ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
258
259#define ENCODE_PAD() ENCODE_CHAR('=')
260
261 /* Iterate over all the bytes in src. Each one will add 8 bits to the
262 * value we're encoding. Accumulate bits in <b>n</b>, and whenever we
263 * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
264 */
265 for ( ; usrc < eous; ++usrc) {
266 n = (n << 8) | *usrc;
267 if ((++n_idx) == 3) {
268 ENCODE_N(0);
269 ENCODE_N(1);
270 ENCODE_N(2);
271 ENCODE_N(3);
272 n_idx = 0;
273 n = 0;
274 }
275 }
276 switch (n_idx) {
277 case 0:
278 /* 0 leftover bits, no padding to add. */
279 break;
280 case 1:
281 /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
282 * by 2 padding characters.
283 */
284 n <<= 4;
285 ENCODE_N(2);
286 ENCODE_N(3);
287 ENCODE_PAD();
288 ENCODE_PAD();
289 break;
290 case 2:
291 /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
292 * by 1 padding character.
293 */
294 n <<= 2;
295 ENCODE_N(1);
296 ENCODE_N(2);
297 ENCODE_N(3);
298 ENCODE_PAD();
299 break;
300 // LCOV_EXCL_START -- we can't reach this point, because we enforce
301 // 0 <= ncov_idx < 3 in the loop above.
302 default:
303 /* Something went catastrophically wrong. */
305 return -1;
306 // LCOV_EXCL_STOP
307 }
308
309#undef ENCODE_N
310#undef ENCODE_PAD
311#undef ENCODE_CHAR
312
313 /* Multiline output always includes at least one newline. */
314 if (flags & BASE64_ENCODE_MULTILINE && linelen != 0)
315 *d++ = '\n';
316
317 tor_assert(d - dest == (ptrdiff_t)enclen);
318
319 *d++ = '\0'; /* NUL terminate the output. */
320
321 return (int) enclen;
322}
323
324/** As base64_encode, but do not add any internal spaces, and remove external
325 * padding from the output stream.
326 * dest must be at least base64_encode_size(srclen, 0), including space for
327 * the removed external padding. */
328int
329base64_encode_nopad(char *dest, size_t destlen,
330 const uint8_t *src, size_t srclen)
331{
332 int n = base64_encode(dest, destlen, (const char*) src, srclen, 0);
333 if (n <= 0)
334 return n;
335 tor_assert((size_t)n < destlen && dest[n] == 0);
336 char *in, *out;
337 in = out = dest;
338 while (*in) {
339 if (*in == '=' || *in == '\n') {
340 ++in;
341 } else {
342 *out++ = *in++;
343 }
344 }
345 *out = 0;
346
347 tor_assert(out - dest <= INT_MAX);
348
349 return (int)(out - dest);
350}
351
352#undef BASE64_OPENSSL_LINELEN
353
354/** @{ */
355/** Special values used for the base64_decode_table */
356#define X 255
357#define SP 64
358#define PAD 65
359/** @} */
360/** Internal table mapping byte values to what they represent in base64.
361 * Numbers 0..63 are 6-bit integers. SPs are spaces, and should be
362 * skipped. Xs are invalid and must not appear in base64. PAD indicates
363 * end-of-string. */
364static const uint8_t base64_decode_table[256] = {
365 X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */
366 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
367 SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
368 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
369 X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
370 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
371 X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
372 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
373 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
374 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
375 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
376 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
377 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
378 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
379 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
380 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
381};
382
383/** Base64 decode <b>srclen</b> bytes of data from <b>src</b>. Write
384 * the result into <b>dest</b>, if it will fit within <b>destlen</b>
385 * bytes. Return the number of bytes written on success; -1 if
386 * destlen is too short, or other failure.
387 *
388 * NOTE 1: destlen is checked conservatively, as though srclen contained no
389 * spaces or padding.
390 *
391 * NOTE 2: This implementation does not check for the correct number of
392 * padding "=" characters at the end of the string, and does not check
393 * for internal padding characters.
394 */
395int
396base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
397{
398 const char *eos = src+srclen;
399 uint32_t n=0;
400 int n_idx=0;
401 size_t di = 0;
402
403 if (destlen > INT_MAX)
404 return -1;
405
406 /* Make sure we leave no uninitialized data in the destination buffer. */
407 memset(dest, 0, destlen);
408
409 /* Iterate over all the bytes in src. Each one will add 0 or 6 bits to the
410 * value we're decoding. Accumulate bits in <b>n</b>, and whenever we have
411 * 24 bits, batch them into 3 bytes and flush those bytes to dest.
412 */
413 for ( ; src < eos; ++src) {
414 unsigned char c = (unsigned char) *src;
415 uint8_t v = base64_decode_table[c];
416 switch (v) {
417 case X:
418 /* This character isn't allowed in base64. */
419 return -1;
420 case SP:
421 /* This character is whitespace, and has no effect. */
422 continue;
423 case PAD:
424 /* We've hit an = character: the data is over. */
425 goto end_of_loop;
426 default:
427 /* We have an actual 6-bit value. Append it to the bits in n. */
428 n = (n<<6) | v;
429 if ((++n_idx) == 4) {
430 /* We've accumulated 24 bits in n. Flush them. */
431 if (destlen < 3 || di > destlen - 3)
432 return -1;
433 dest[di++] = (n>>16);
434 dest[di++] = (n>>8) & 0xff;
435 dest[di++] = (n) & 0xff;
436 n_idx = 0;
437 n = 0;
438 }
439 }
440 }
441 end_of_loop:
442 /* If we have leftover bits, we need to cope. */
443 switch (n_idx) {
444 case 0:
445 default:
446 /* No leftover bits. We win. */
447 break;
448 case 1:
449 /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
450 return -1;
451 case 2:
452 /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
453 if (destlen < 1 || di > destlen - 1)
454 return -1;
455 dest[di++] = n >> 4;
456 break;
457 case 3:
458 /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
459 if (destlen < 2 || di > destlen - 2)
460 return -1;
461 dest[di++] = n >> 10;
462 dest[di++] = n >> 2;
463 }
464
465 tor_assert(di <= destlen);
466
467 return (int)di;
468}
469#undef X
470#undef SP
471#undef PAD
472
473/** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
474 * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
475 * <b>dest</b>.
476 */
477void
478base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
479{
480 const char *end;
481 char *cp;
482
483 tor_assert(srclen < SIZE_T_CEILING / 2 - 1);
484 tor_assert(destlen >= BASE16_BUFSIZE(srclen));
485 tor_assert(destlen < SIZE_T_CEILING);
486
487 /* Make sure we leave no uninitialized data in the destination buffer. */
488 memset(dest, 0, destlen);
489
490 cp = dest;
491 end = src+srclen;
492 while (src<end) {
493 *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) >> 4 ];
494 *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) & 0xf ];
495 ++src;
496 }
497 *cp = '\0';
498}
499
500/** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode
501 * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
502 * Return the number of bytes decoded on success, -1 on failure. If
503 * <b>destlen</b> is greater than INT_MAX or less than half of
504 * <b>srclen</b>, -1 is returned. */
505int
506base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
507{
508 const char *end;
509 char *dest_orig = dest;
510 int v1,v2;
511
512 if ((srclen % 2) != 0)
513 return -1;
514 if (destlen < srclen/2 || destlen > INT_MAX)
515 return -1;
516
517 /* Make sure we leave no uninitialized data in the destination buffer. */
518 memset(dest, 0, destlen);
519
520 end = src+srclen;
521 while (src<end) {
522 v1 = hex_decode_digit(*src);
523 v2 = hex_decode_digit(*(src+1));
524 if (v1<0||v2<0)
525 return -1;
526 *(uint8_t*)dest = (v1<<4)|v2;
527 ++dest;
528 src+=2;
529 }
530
531 tor_assert((dest-dest_orig) <= (ptrdiff_t) destlen);
532
533 return (int) (dest-dest_orig);
534}
int base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:90
const char * hex_str(const char *from, size_t fromlen)
Definition: binascii.c:34
#define X
Definition: binascii.c:356
int base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:396
int base64_encode(char *dest, size_t destlen, const char *src, size_t srclen, int flags)
Definition: binascii.c:215
int base64_encode_nopad(char *dest, size_t destlen, const uint8_t *src, size_t srclen)
Definition: binascii.c:329
int base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:506
static const uint8_t base64_decode_table[256]
Definition: binascii.c:364
size_t base64_decode_maxsize(size_t srclen)
Definition: binascii.c:187
size_t base64_encode_size(size_t srclen, int flags)
Definition: binascii.c:166
static const char base64_encode_table[64]
Definition: binascii.c:195
void base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:478
void base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:60
Header for binascii.c.
#define BASE64_LEN(n)
Definition: binascii.h:28
#define BASE32_CHARS
Definition: binascii.h:53
Locale-independent character-type inspection (header)
static int hex_decode_digit(char c)
Definition: compat_ctype.h:43
Headers for log.c.
#define LD_GENERAL
Definition: log.h:62
Headers for util_malloc.c.
#define tor_free(p)
Definition: malloc.h:56
Header for muldiv.c.
Integer definitions used throughout Tor.
#define SIZE_T_CEILING
Definition: torint.h:126
Macros to manage assertions, fatal and non-fatal.
#define tor_assert(expr)
Definition: util_bug.h:103
#define tor_fragile_assert()
Definition: util_bug.h:278