Tor 0.4.9.0-alpha-dev
parsecommon.c
Go to the documentation of this file.
1/* Copyright (c) 2016-2021, The Tor Project, Inc. */
2/* See LICENSE for licensing information */
3
4/**
5 * \file parsecommon.c
6 * \brief Common code to parse and validate various type of descriptors.
7 **/
8
10#include "lib/log/log.h"
11#include "lib/log/util_bug.h"
15#include "lib/string/printf.h"
16#include "lib/memarea/memarea.h"
18#include "lib/ctime/di_ops.h"
19
20#include <string.h>
21
22#define MIN_ANNOTATION A_PURPOSE
23#define MAX_ANNOTATION A_UNKNOWN_
24
25#define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
26#define ALLOC(sz) memarea_alloc(area,sz)
27#define STRDUP(str) memarea_strdup(area,str)
28#define STRNDUP(str,n) memarea_strndup(area,(str),(n))
29
30#define RET_ERR(msg) \
31 STMT_BEGIN \
32 if (tok) token_clear(tok); \
33 tok = ALLOC_ZERO(sizeof(directory_token_t)); \
34 tok->tp = ERR_; \
35 tok->error = STRDUP(msg); \
36 goto done_tokenizing; \
37 STMT_END
38
39/** Free all resources allocated for <b>tok</b> */
40void
42{
43 if (tok->key)
44 crypto_pk_free(tok->key);
45}
46
47/** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
48 * them to <b>out</b>. Parse according to the token rules in <b>table</b>.
49 * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the
50 * entire string.
51 */
52int
54 const char *start, const char *end, smartlist_t *out,
55 const token_rule_t *table, int flags)
56{
57 const char **s;
58 directory_token_t *tok = NULL;
59 int counts[NIL_];
60 int i;
61 int first_nonannotation;
62 int prev_len = smartlist_len(out);
63 tor_assert(area);
64
65 s = &start;
66 if (!end) {
67 end = start+strlen(start);
68 } else {
69 /* it's only meaningful to check for nuls if we got an end-of-string ptr */
70 if (memchr(start, '\0', end-start)) {
71 log_warn(LD_DIR, "parse error: internal NUL character.");
72 return -1;
73 }
74 }
75 for (i = 0; i < NIL_; ++i)
76 counts[i] = 0;
77
78 SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
79
80 while (*s < end && (!tok || tok->tp != EOF_)) {
81 tok = get_next_token(area, s, end, table);
82 if (tok->tp == ERR_) {
83 log_warn(LD_DIR, "parse error: %s", tok->error);
84 token_clear(tok);
85 return -1;
86 }
87 ++counts[tok->tp];
88 smartlist_add(out, tok);
89 *s = eat_whitespace_eos(*s, end);
90 }
91
92 if (flags & TS_NOCHECK)
93 return 0;
94
95 if ((flags & TS_ANNOTATIONS_OK)) {
96 first_nonannotation = -1;
97 for (i = 0; i < smartlist_len(out); ++i) {
98 tok = smartlist_get(out, i);
99 if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
100 first_nonannotation = i;
101 break;
102 }
103 }
104 if (first_nonannotation < 0) {
105 log_warn(LD_DIR, "parse error: item contains only annotations");
106 return -1;
107 }
108 for (i=first_nonannotation; i < smartlist_len(out); ++i) {
109 tok = smartlist_get(out, i);
110 if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
111 log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
112 return -1;
113 }
114 }
115 if ((flags & TS_NO_NEW_ANNOTATIONS)) {
116 if (first_nonannotation != prev_len) {
117 log_warn(LD_DIR, "parse error: Unexpected annotations.");
118 return -1;
119 }
120 }
121 } else {
122 for (i=0; i < smartlist_len(out); ++i) {
123 tok = smartlist_get(out, i);
124 if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
125 log_warn(LD_DIR, "parse error: no annotations allowed.");
126 return -1;
127 }
128 }
129 first_nonannotation = 0;
130 }
131 for (i = 0; table[i].t; ++i) {
132 if (counts[table[i].v] < table[i].min_cnt) {
133 log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
134 return -1;
135 }
136 if (counts[table[i].v] > table[i].max_cnt) {
137 log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
138 return -1;
139 }
140 if (table[i].pos & AT_START) {
141 if (smartlist_len(out) < 1 ||
142 (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
143 log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
144 return -1;
145 }
146 }
147 if (table[i].pos & AT_END) {
148 if (smartlist_len(out) < 1 ||
149 (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
150 log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
151 return -1;
152 }
153 }
154 }
155 return 0;
156}
157
158/** Helper: parse space-separated arguments from the string <b>s</b> ending at
159 * <b>eol</b>, and store them in the args field of <b>tok</b>. Store the
160 * number of parsed elements into the n_args field of <b>tok</b>. Allocate
161 * all storage in <b>area</b>. Return the number of arguments parsed, or
162 * return -1 if there was an insanely high number of arguments. */
163static inline int
165 const char *s, const char *eol)
166{
167/** Largest number of arguments we'll accept to any token, ever. */
168#define MAX_ARGS 512
169 char *mem = memarea_strndup(area, s, eol-s);
170 char *cp = mem;
171 int j = 0;
172 char *args[MAX_ARGS];
173 while (*cp) {
174 if (j == MAX_ARGS)
175 return -1;
176 args[j++] = cp;
177 cp = (char*)find_whitespace(cp);
178 if (!cp || !*cp)
179 break; /* End of the line. */
180 *cp++ = '\0';
181 cp = (char*)eat_whitespace(cp);
182 }
183 tok->n_args = j;
184 tok->args = memarea_memdup(area, args, j*sizeof(char*));
185 return j;
186#undef MAX_ARGS
187}
188
189/** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
190 * the object syntax of <b>o_syn</b>. Allocate all storage in <b>area</b>.
191 * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
192 * conform to the syntax we wanted.
193 **/
194static inline directory_token_t *
195token_check_object(memarea_t *area, const char *kwd,
196 directory_token_t *tok, obj_syntax o_syn)
197{
198 char ebuf[128];
199 switch (o_syn) {
200 case NO_OBJ:
201 /* No object is allowed for this token. */
202 if (tok->object_body) {
203 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
204 RET_ERR(ebuf);
205 }
206 if (tok->key) {
207 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
208 RET_ERR(ebuf);
209 }
210 break;
211 case NEED_OBJ:
212 /* There must be a (non-key) object. */
213 if (!tok->object_body) {
214 tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
215 RET_ERR(ebuf);
216 }
217 break;
218 case OPT_KEY_1024:
219 /* If there is anything, it must be a 1024-bit RSA key. */
220 if (tok->object_body && !tok->key) {
221 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
222 RET_ERR(ebuf);
223 }
224 if (!tok->key) {
225 break;
226 }
227 FALLTHROUGH;
228 case NEED_KEY_1024: /* There must be a 1024-bit public key. */
229 if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
230 tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
231 kwd, crypto_pk_num_bits(tok->key));
232 RET_ERR(ebuf);
233 }
234 FALLTHROUGH;
235 case NEED_KEY: /* There must be some kind of key. */
236 if (!tok->key) {
237 tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
238 RET_ERR(ebuf);
239 }
240
241 if (crypto_pk_key_is_private(tok->key)) {
242 tor_snprintf(ebuf, sizeof(ebuf),
243 "Private key given for %s, which wants a public key", kwd);
244 RET_ERR(ebuf);
245 }
246 break;
247 case OBJ_OK:
248 /* Anything goes with this token. */
249 break;
250 }
251
252 done_tokenizing:
253 return tok;
254}
255
256/** Return true iff the <b>memlen</b>-byte chunk of memory at
257 * <b>memlen</b> is the same length as <b>token</b>, and their
258 * contents are equal. */
259static bool
260mem_eq_token(const void *mem, size_t memlen, const char *token)
261{
262 size_t len = strlen(token);
263 return memlen == len && fast_memeq(mem, token, len);
264}
265
266/** Helper function: read the next token from *s, advance *s to the end of the
267 * token, and return the parsed token. Parse *<b>s</b> according to the list
268 * of tokens in <b>table</b>.
269 */
272 const char **s, const char *eos, const token_rule_t *table)
273{
274 /** Reject any object at least this big; it is probably an overflow, an
275 * attack, a bug, or some other nonsense. */
276#define MAX_UNPARSED_OBJECT_SIZE (128*1024)
277 /** Reject any line at least this big; it is probably an overflow, an
278 * attack, a bug, or some other nonsense. */
279#define MAX_LINE_LENGTH (128*1024)
280
281 const char *next, *eol;
282 size_t obname_len;
283 int i;
285 obj_syntax o_syn = NO_OBJ;
286 char ebuf[128];
287 const char *kwd = "";
288
289 tor_assert(area);
290 tok = ALLOC_ZERO(sizeof(directory_token_t));
291 tok->tp = ERR_;
292
293 /* Set *s to first token, eol to end-of-line, next to after first token */
294 *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
295 tor_assert(eos >= *s);
296 eol = memchr(*s, '\n', eos-*s);
297 if (!eol)
298 eol = eos;
299 if (eol - *s > MAX_LINE_LENGTH) {
300 RET_ERR("Line far too long");
301 }
302
303 next = find_whitespace_eos(*s, eol);
304
305 if (mem_eq_token(*s, next-*s, "opt")) {
306 /* Skip past an "opt" at the start of the line. */
307 *s = eat_whitespace_eos_no_nl(next, eol);
308 next = find_whitespace_eos(*s, eol);
309 } else if (*s == eos) { /* If no "opt", and end-of-line, line is invalid */
310 RET_ERR("Unexpected EOF");
311 }
312
313 /* Search the table for the appropriate entry. (I tried a binary search
314 * instead, but it wasn't any faster.) */
315 for (i = 0; table[i].t ; ++i) {
316 if (mem_eq_token(*s, next-*s, table[i].t)) {
317 /* We've found the keyword. */
318 kwd = table[i].t;
319 tok->tp = table[i].v;
320 o_syn = table[i].os;
321 *s = eat_whitespace_eos_no_nl(next, eol);
322 /* We go ahead whether there are arguments or not, so that tok->args is
323 * always set if we want arguments. */
324 if (table[i].concat_args) {
325 /* The keyword takes the line as a single argument */
326 tok->args = ALLOC(sizeof(char*));
327 tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
328 tok->n_args = 1;
329 } else {
330 /* This keyword takes multiple arguments. */
331 if (get_token_arguments(area, tok, *s, eol)<0) {
332 tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
333 RET_ERR(ebuf);
334 }
335 *s = eol;
336 }
337 if (tok->n_args < table[i].min_args) {
338 tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
339 RET_ERR(ebuf);
340 } else if (tok->n_args > table[i].max_args) {
341 tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
342 RET_ERR(ebuf);
343 }
344 break;
345 }
346 }
347
348 if (tok->tp == ERR_) {
349 /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
350 if (*s < eol && **s == '@')
351 tok->tp = A_UNKNOWN_;
352 else
353 tok->tp = K_OPT;
354 tok->args = ALLOC(sizeof(char*));
355 tok->args[0] = STRNDUP(*s, eol-*s);
356 tok->n_args = 1;
357 o_syn = OBJ_OK;
358 }
359
360 /* Check whether there's an object present */
361 *s = eat_whitespace_eos(eol, eos); /* Scan from end of first line */
362 tor_assert(eos >= *s);
363 eol = memchr(*s, '\n', eos-*s);
364 if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
365 goto check_object;
366
367 if (eol - *s <= 16 || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
368 !mem_eq_token(eol-5, 5, "-----") || /* nuls or invalid endings */
369 (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */
370 RET_ERR("Malformed object: bad begin line");
371 }
372 tok->object_type = STRNDUP(*s+11, eol-*s-16);
373 obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
374 *s = eol+1; /* Set *s to possible start of object data (could be eos) */
375
376 /* Go to the end of the object */
377 next = tor_memstr(*s, eos-*s, "-----END ");
378 if (!next) {
379 RET_ERR("Malformed object: missing object end line");
380 }
381 tor_assert(eos >= next);
382 eol = memchr(next, '\n', eos-next);
383 if (!eol) /* end-of-line marker, or eos if there's no '\n' */
384 eol = eos;
385 /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
386 if ((size_t)(eol-next) != 9+obname_len+5 ||
387 !mem_eq_token(next+9, obname_len, tok->object_type) ||
388 !mem_eq_token(eol-5, 5, "-----")) {
389 tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
390 tok->object_type);
391 ebuf[sizeof(ebuf)-1] = '\0';
392 RET_ERR(ebuf);
393 }
394 if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
395 RET_ERR("Couldn't parse object: missing footer or object much too big.");
396
397 {
398 int r;
399 size_t maxsize = base64_decode_maxsize(next-*s);
400 tok->object_body = ALLOC(maxsize);
401 r = base64_decode(tok->object_body, maxsize, *s, next-*s);
402 if (r<0)
403 RET_ERR("Malformed object: bad base64-encoded data");
404 tok->object_size = r;
405 }
406
407 if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
408 if (o_syn != OPT_KEY_1024 && o_syn != NEED_KEY &&
409 o_syn != NEED_KEY_1024 && o_syn != OBJ_OK) {
410 RET_ERR("Unexpected public key.");
411 }
413 if (! tok->key)
414 RET_ERR("Couldn't parse public key.");
415 }
416 *s = eol;
417
418 check_object:
419 tok = token_check_object(area, kwd, tok, o_syn);
420
421 done_tokenizing:
422 return tok;
423
424#undef RET_ERR
425#undef ALLOC
426#undef ALLOC_ZERO
427#undef STRDUP
428#undef STRNDUP
429}
430
431/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
432 * with an assert if no such keyword is found.
433 */
436 const char *keyword_as_string)
437{
438 directory_token_t *tok = find_opt_by_keyword(s, keyword);
439 if (PREDICT_UNLIKELY(!tok)) {
440 log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
441 "been validated. Internal error.", keyword_as_string, (int)keyword);
442 tor_assert(tok);
443 }
444 return tok;
445}
446
447/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
448 * NULL if no such keyword is found.
449 */
452{
453 SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
454 return NULL;
455}
456
457/** If there are any directory_token_t entries in <b>s</b> whose keyword is
458 * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
459 * in the same order in which they occur in <b>s</b>. Otherwise return
460 * NULL. */
463{
464 smartlist_t *out = NULL;
466 if (t->tp == k) {
467 if (!out)
468 out = smartlist_new();
469 smartlist_add(out, t);
470 });
471 return out;
472}
int base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:396
size_t base64_decode_maxsize(size_t srclen)
Definition: binascii.c:187
Header for binascii.c.
static conn_counts_t counts
Definition: connstats.c:72
Headers for crypto_rsa.c.
#define PK_BYTES
Definition: crypto_rsa.h:24
crypto_pk_t * crypto_pk_asn1_decode(const char *str, size_t len)
int crypto_pk_num_bits(crypto_pk_t *env)
int crypto_pk_key_is_private(const crypto_pk_t *key)
Headers for di_ops.c.
#define fast_memeq(a, b, c)
Definition: di_ops.h:35
Headers for log.c.
#define LD_BUG
Definition: log.h:86
#define LD_DIR
Definition: log.h:88
char * memarea_strndup(memarea_t *area, const char *s, size_t n)
Definition: memarea.c:273
void * memarea_memdup(memarea_t *area, const void *s, size_t n)
Definition: memarea.c:257
Header for memarea.c.
void token_clear(directory_token_t *tok)
Definition: parsecommon.c:41
static directory_token_t * token_check_object(memarea_t *area, const char *kwd, directory_token_t *tok, obj_syntax o_syn)
Definition: parsecommon.c:195
directory_token_t * get_next_token(memarea_t *area, const char **s, const char *eos, const token_rule_t *table)
Definition: parsecommon.c:271
smartlist_t * find_all_by_keyword(const smartlist_t *s, directory_keyword k)
Definition: parsecommon.c:462
int tokenize_string(memarea_t *area, const char *start, const char *end, smartlist_t *out, const token_rule_t *table, int flags)
Definition: parsecommon.c:53
directory_token_t * find_opt_by_keyword(const smartlist_t *s, directory_keyword keyword)
Definition: parsecommon.c:451
static int get_token_arguments(memarea_t *area, directory_token_t *tok, const char *s, const char *eol)
Definition: parsecommon.c:164
directory_token_t * find_by_keyword_(smartlist_t *s, directory_keyword keyword, const char *keyword_as_string)
Definition: parsecommon.c:435
static bool mem_eq_token(const void *mem, size_t memlen, const char *token)
Definition: parsecommon.c:260
Header file for parsecommon.c.
obj_syntax
Definition: parsecommon.h:220
@ NEED_KEY
Definition: parsecommon.h:225
@ OPT_KEY_1024
Definition: parsecommon.h:223
@ OBJ_OK
Definition: parsecommon.h:226
@ NO_OBJ
Definition: parsecommon.h:221
@ NEED_OBJ
Definition: parsecommon.h:222
@ NEED_KEY_1024
Definition: parsecommon.h:224
directory_keyword
Definition: parsecommon.h:23
int tor_snprintf(char *str, size_t size, const char *format,...)
Definition: printf.c:27
Header for printf.c.
Header for smartlist.c.
void smartlist_add(smartlist_t *sl, void *element)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
directory_keyword tp
Definition: parsecommon.h:204
struct crypto_pk_t * key
Definition: parsecommon.h:212
obj_syntax os
Definition: parsecommon.h:290
directory_keyword v
Definition: parsecommon.h:281
const char * t
Definition: parsecommon.h:279
Macros to manage assertions, fatal and non-fatal.
#define tor_assert(expr)
Definition: util_bug.h:103
const char * find_whitespace_eos(const char *s, const char *eos)
Definition: util_string.c:377
int strcmpstart(const char *s1, const char *s2)
Definition: util_string.c:217
const char * find_whitespace(const char *s)
Definition: util_string.c:355
const char * eat_whitespace_eos(const char *s, const char *eos)
Definition: util_string.c:306
const char * eat_whitespace(const char *s)
Definition: util_string.c:279
const char * eat_whitespace_eos_no_nl(const char *s, const char *eos)
Definition: util_string.c:344
Header for util_string.c.