Tor 0.4.9.0-alpha-dev
parsecommon.c
Go to the documentation of this file.
1/* Copyright (c) 2016-2021, The Tor Project, Inc. */
2/* See LICENSE for licensing information */
3
4/**
5 * \file parsecommon.c
6 * \brief Common code to parse and validate various type of descriptors.
7 **/
8
10#include "lib/log/log.h"
11#include "lib/log/util_bug.h"
15#include "lib/string/printf.h"
16#include "lib/memarea/memarea.h"
18#include "lib/ctime/di_ops.h"
19
20#include <string.h>
21
22#define MIN_ANNOTATION A_PURPOSE
23#define MAX_ANNOTATION A_UNKNOWN_
24
25#define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
26#define ALLOC(sz) memarea_alloc(area,sz)
27#define STRDUP(str) memarea_strdup(area,str)
28#define STRNDUP(str,n) memarea_strndup(area,(str),(n))
29
30#define RET_ERR(msg) \
31 STMT_BEGIN \
32 if (tok) token_clear(tok); \
33 tok = ALLOC_ZERO(sizeof(directory_token_t)); \
34 tok->tp = ERR_; \
35 tok->error = STRDUP(msg); \
36 goto done_tokenizing; \
37 STMT_END
38
39/** Free all resources allocated for <b>tok</b> */
40void
42{
43 if (tok->key)
44 crypto_pk_free(tok->key);
45}
46
47/** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
48 * them to <b>out</b>. Parse according to the token rules in <b>table</b>.
49 * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the
50 * entire string.
51 */
52int
54 const char *start, const char *end, smartlist_t *out,
55 const token_rule_t *table, int flags)
56{
57 const char **s;
58 directory_token_t *tok = NULL;
59 int counts[NIL_];
60 int i;
61 int first_nonannotation;
62 int prev_len = smartlist_len(out);
63 tor_assert(area);
64
65 s = &start;
66 if (!end) {
67 end = start+strlen(start);
68 } else {
69 /* it's only meaningful to check for nuls if we got an end-of-string ptr */
70 if (memchr(start, '\0', end-start)) {
71 log_warn(LD_DIR, "parse error: internal NUL character.");
72 return -1;
73 }
74 }
75 for (i = 0; i < NIL_; ++i)
76 counts[i] = 0;
77
78 SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
79
80 while (*s < end && (!tok || tok->tp != EOF_)) {
81 tok = get_next_token(area, s, end, table);
82 if (tok->tp == ERR_) {
83 log_warn(LD_DIR, "parse error: %s", tok->error);
84 token_clear(tok);
85 return -1;
86 }
87 ++counts[tok->tp];
88 smartlist_add(out, tok);
89 *s = eat_whitespace_eos(*s, end);
90 }
91
92 if (flags & TS_NOCHECK)
93 return 0;
94
95 if ((flags & TS_ANNOTATIONS_OK)) {
96 first_nonannotation = -1;
97 for (i = 0; i < smartlist_len(out); ++i) {
98 tok = smartlist_get(out, i);
99 if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
100 first_nonannotation = i;
101 break;
102 }
103 }
104 if (first_nonannotation < 0) {
105 log_warn(LD_DIR, "parse error: item contains only annotations");
106 return -1;
107 }
108 for (i=first_nonannotation; i < smartlist_len(out); ++i) {
109 tok = smartlist_get(out, i);
110 if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
111 log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
112 return -1;
113 }
114 }
115 if ((flags & TS_NO_NEW_ANNOTATIONS)) {
116 if (first_nonannotation != prev_len) {
117 log_warn(LD_DIR, "parse error: Unexpected annotations.");
118 return -1;
119 }
120 }
121 } else {
122 for (i=0; i < smartlist_len(out); ++i) {
123 tok = smartlist_get(out, i);
124 if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
125 log_warn(LD_DIR, "parse error: no annotations allowed.");
126 return -1;
127 }
128 }
129 first_nonannotation = 0;
130 }
131 for (i = 0; table[i].t; ++i) {
132 if (counts[table[i].v] < table[i].min_cnt) {
133 log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
134 return -1;
135 }
136 if (counts[table[i].v] > table[i].max_cnt) {
137 log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
138 return -1;
139 }
140 if (table[i].pos & AT_START) {
141 if (smartlist_len(out) < 1 ||
142 (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
143 log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
144 return -1;
145 }
146 }
147 if (table[i].pos & AT_END) {
148 if (smartlist_len(out) < 1 ||
149 (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
150 log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
151 return -1;
152 }
153 }
154 }
155 return 0;
156}
157
158/** Helper: parse space-separated arguments from the string <b>s</b> ending at
159 * <b>eol</b>, and store them in the args field of <b>tok</b>. Store the
160 * number of parsed elements into the n_args field of <b>tok</b>. Allocate
161 * all storage in <b>area</b>. Return the number of arguments parsed, or
162 * return -1 if there was an insanely high number of arguments. */
163static inline int
165 const char *s, const char *eol)
166{
167/** Largest number of arguments we'll accept to any token, ever. */
168#define MAX_ARGS 512
169 char *mem = memarea_strndup(area, s, eol-s);
170 char *cp = mem;
171 int j = 0;
172 char *args[MAX_ARGS];
173 while (*cp) {
174 if (j == MAX_ARGS)
175 return -1;
176 args[j++] = cp;
177 cp = (char*)find_whitespace(cp);
178 if (!cp || !*cp)
179 break; /* End of the line. */
180 *cp++ = '\0';
181 cp = (char*)eat_whitespace(cp);
182 }
183 tok->n_args = j;
184 tok->args = memarea_memdup(area, args, j*sizeof(char*));
185 return j;
186#undef MAX_ARGS
187}
188
189/** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
190 * the object syntax of <b>o_syn</b>. Allocate all storage in <b>area</b>.
191 * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
192 * conform to the syntax we wanted.
193 **/
194static inline directory_token_t *
195token_check_object(memarea_t *area, const char *kwd,
196 directory_token_t *tok, obj_syntax o_syn)
197{
198 char ebuf[128];
199 switch (o_syn) {
200 case NO_OBJ:
201 /* No object is allowed for this token. */
202 if (tok->object_body) {
203 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
204 RET_ERR(ebuf);
205 }
206 if (tok->key) {
207 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
208 RET_ERR(ebuf);
209 }
210 break;
211 case NEED_OBJ:
212 /* There must be a (non-key) object. */
213 if (!tok->object_body) {
214 tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
215 RET_ERR(ebuf);
216 }
217 break;
218 case NEED_KEY_1024: /* There must be a 1024-bit public key. */
219 if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
220 tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
221 kwd, crypto_pk_num_bits(tok->key));
222 RET_ERR(ebuf);
223 }
224 FALLTHROUGH;
225 case NEED_KEY: /* There must be some kind of key. */
226 if (!tok->key) {
227 tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
228 RET_ERR(ebuf);
229 }
230
231 if (crypto_pk_key_is_private(tok->key)) {
232 tor_snprintf(ebuf, sizeof(ebuf),
233 "Private key given for %s, which wants a public key", kwd);
234 RET_ERR(ebuf);
235 }
236 break;
237 case OBJ_OK:
238 /* Anything goes with this token. */
239 break;
240 }
241
242 done_tokenizing:
243 return tok;
244}
245
246/** Return true iff the <b>memlen</b>-byte chunk of memory at
247 * <b>memlen</b> is the same length as <b>token</b>, and their
248 * contents are equal. */
249static bool
250mem_eq_token(const void *mem, size_t memlen, const char *token)
251{
252 size_t len = strlen(token);
253 return memlen == len && fast_memeq(mem, token, len);
254}
255
256/** Helper function: read the next token from *s, advance *s to the end of the
257 * token, and return the parsed token. Parse *<b>s</b> according to the list
258 * of tokens in <b>table</b>.
259 */
262 const char **s, const char *eos, const token_rule_t *table)
263{
264 /** Reject any object at least this big; it is probably an overflow, an
265 * attack, a bug, or some other nonsense. */
266#define MAX_UNPARSED_OBJECT_SIZE (128*1024)
267 /** Reject any line at least this big; it is probably an overflow, an
268 * attack, a bug, or some other nonsense. */
269#define MAX_LINE_LENGTH (128*1024)
270
271 const char *next, *eol;
272 size_t obname_len;
273 int i;
275 obj_syntax o_syn = NO_OBJ;
276 char ebuf[128];
277 const char *kwd = "";
278
279 tor_assert(area);
280 tok = ALLOC_ZERO(sizeof(directory_token_t));
281 tok->tp = ERR_;
282
283 /* Set *s to first token, eol to end-of-line, next to after first token */
284 *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
285 tor_assert(eos >= *s);
286 eol = memchr(*s, '\n', eos-*s);
287 if (!eol)
288 eol = eos;
289 if (eol - *s > MAX_LINE_LENGTH) {
290 RET_ERR("Line far too long");
291 }
292
293 next = find_whitespace_eos(*s, eol);
294
295 if (mem_eq_token(*s, next-*s, "opt")) {
296 /* Skip past an "opt" at the start of the line. */
297 *s = eat_whitespace_eos_no_nl(next, eol);
298 next = find_whitespace_eos(*s, eol);
299 } else if (*s == eos) { /* If no "opt", and end-of-line, line is invalid */
300 RET_ERR("Unexpected EOF");
301 }
302
303 /* Search the table for the appropriate entry. (I tried a binary search
304 * instead, but it wasn't any faster.) */
305 for (i = 0; table[i].t ; ++i) {
306 if (mem_eq_token(*s, next-*s, table[i].t)) {
307 /* We've found the keyword. */
308 kwd = table[i].t;
309 tok->tp = table[i].v;
310 o_syn = table[i].os;
311 *s = eat_whitespace_eos_no_nl(next, eol);
312 /* We go ahead whether there are arguments or not, so that tok->args is
313 * always set if we want arguments. */
314 if (table[i].concat_args) {
315 /* The keyword takes the line as a single argument */
316 tok->args = ALLOC(sizeof(char*));
317 tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
318 tok->n_args = 1;
319 } else {
320 /* This keyword takes multiple arguments. */
321 if (get_token_arguments(area, tok, *s, eol)<0) {
322 tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
323 RET_ERR(ebuf);
324 }
325 *s = eol;
326 }
327 if (tok->n_args < table[i].min_args) {
328 tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
329 RET_ERR(ebuf);
330 } else if (tok->n_args > table[i].max_args) {
331 tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
332 RET_ERR(ebuf);
333 }
334 break;
335 }
336 }
337
338 if (tok->tp == ERR_) {
339 /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
340 if (*s < eol && **s == '@')
341 tok->tp = A_UNKNOWN_;
342 else
343 tok->tp = K_OPT;
344 tok->args = ALLOC(sizeof(char*));
345 tok->args[0] = STRNDUP(*s, eol-*s);
346 tok->n_args = 1;
347 o_syn = OBJ_OK;
348 }
349
350 /* Check whether there's an object present */
351 *s = eat_whitespace_eos(eol, eos); /* Scan from end of first line */
352 tor_assert(eos >= *s);
353 eol = memchr(*s, '\n', eos-*s);
354 if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
355 goto check_object;
356
357 if (eol - *s <= 16 || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
358 !mem_eq_token(eol-5, 5, "-----") || /* nuls or invalid endings */
359 (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */
360 RET_ERR("Malformed object: bad begin line");
361 }
362 tok->object_type = STRNDUP(*s+11, eol-*s-16);
363 obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
364 *s = eol+1; /* Set *s to possible start of object data (could be eos) */
365
366 /* Go to the end of the object */
367 next = tor_memstr(*s, eos-*s, "-----END ");
368 if (!next) {
369 RET_ERR("Malformed object: missing object end line");
370 }
371 tor_assert(eos >= next);
372 eol = memchr(next, '\n', eos-next);
373 if (!eol) /* end-of-line marker, or eos if there's no '\n' */
374 eol = eos;
375 /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
376 if ((size_t)(eol-next) != 9+obname_len+5 ||
377 !mem_eq_token(next+9, obname_len, tok->object_type) ||
378 !mem_eq_token(eol-5, 5, "-----")) {
379 tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
380 tok->object_type);
381 ebuf[sizeof(ebuf)-1] = '\0';
382 RET_ERR(ebuf);
383 }
384 if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
385 RET_ERR("Couldn't parse object: missing footer or object much too big.");
386
387 {
388 int r;
389 size_t maxsize = base64_decode_maxsize(next-*s);
390 tok->object_body = ALLOC(maxsize);
391 r = base64_decode(tok->object_body, maxsize, *s, next-*s);
392 if (r<0)
393 RET_ERR("Malformed object: bad base64-encoded data");
394 tok->object_size = r;
395 }
396
397 if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
398 if (o_syn != NEED_KEY && o_syn != NEED_KEY_1024 && o_syn != OBJ_OK) {
399 RET_ERR("Unexpected public key.");
400 }
402 if (! tok->key)
403 RET_ERR("Couldn't parse public key.");
404 }
405 *s = eol;
406
407 check_object:
408 tok = token_check_object(area, kwd, tok, o_syn);
409
410 done_tokenizing:
411 return tok;
412
413#undef RET_ERR
414#undef ALLOC
415#undef ALLOC_ZERO
416#undef STRDUP
417#undef STRNDUP
418}
419
420/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
421 * with an assert if no such keyword is found.
422 */
425 const char *keyword_as_string)
426{
427 directory_token_t *tok = find_opt_by_keyword(s, keyword);
428 if (PREDICT_UNLIKELY(!tok)) {
429 log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
430 "been validated. Internal error.", keyword_as_string, (int)keyword);
431 tor_assert(tok);
432 }
433 return tok;
434}
435
436/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
437 * NULL if no such keyword is found.
438 */
441{
442 SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
443 return NULL;
444}
445
446/** If there are any directory_token_t entries in <b>s</b> whose keyword is
447 * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
448 * in the same order in which they occur in <b>s</b>. Otherwise return
449 * NULL. */
452{
453 smartlist_t *out = NULL;
455 if (t->tp == k) {
456 if (!out)
457 out = smartlist_new();
458 smartlist_add(out, t);
459 });
460 return out;
461}
int base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:396
size_t base64_decode_maxsize(size_t srclen)
Definition: binascii.c:187
Header for binascii.c.
static conn_counts_t counts
Definition: connstats.c:72
Headers for crypto_rsa.c.
#define PK_BYTES
Definition: crypto_rsa.h:24
crypto_pk_t * crypto_pk_asn1_decode(const char *str, size_t len)
int crypto_pk_num_bits(crypto_pk_t *env)
int crypto_pk_key_is_private(const crypto_pk_t *key)
Headers for di_ops.c.
#define fast_memeq(a, b, c)
Definition: di_ops.h:35
Headers for log.c.
#define LD_BUG
Definition: log.h:86
#define LD_DIR
Definition: log.h:88
char * memarea_strndup(memarea_t *area, const char *s, size_t n)
Definition: memarea.c:273
void * memarea_memdup(memarea_t *area, const void *s, size_t n)
Definition: memarea.c:257
Header for memarea.c.
void token_clear(directory_token_t *tok)
Definition: parsecommon.c:41
static directory_token_t * token_check_object(memarea_t *area, const char *kwd, directory_token_t *tok, obj_syntax o_syn)
Definition: parsecommon.c:195
directory_token_t * get_next_token(memarea_t *area, const char **s, const char *eos, const token_rule_t *table)
Definition: parsecommon.c:261
smartlist_t * find_all_by_keyword(const smartlist_t *s, directory_keyword k)
Definition: parsecommon.c:451
int tokenize_string(memarea_t *area, const char *start, const char *end, smartlist_t *out, const token_rule_t *table, int flags)
Definition: parsecommon.c:53
directory_token_t * find_opt_by_keyword(const smartlist_t *s, directory_keyword keyword)
Definition: parsecommon.c:440
static int get_token_arguments(memarea_t *area, directory_token_t *tok, const char *s, const char *eol)
Definition: parsecommon.c:164
directory_token_t * find_by_keyword_(smartlist_t *s, directory_keyword keyword, const char *keyword_as_string)
Definition: parsecommon.c:424
static bool mem_eq_token(const void *mem, size_t memlen, const char *token)
Definition: parsecommon.c:250
Header file for parsecommon.c.
obj_syntax
Definition: parsecommon.h:220
@ NEED_KEY
Definition: parsecommon.h:224
@ OBJ_OK
Definition: parsecommon.h:225
@ NO_OBJ
Definition: parsecommon.h:221
@ NEED_OBJ
Definition: parsecommon.h:222
@ NEED_KEY_1024
Definition: parsecommon.h:223
directory_keyword
Definition: parsecommon.h:23
int tor_snprintf(char *str, size_t size, const char *format,...)
Definition: printf.c:27
Header for printf.c.
Header for smartlist.c.
void smartlist_add(smartlist_t *sl, void *element)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
directory_keyword tp
Definition: parsecommon.h:204
struct crypto_pk_t * key
Definition: parsecommon.h:212
obj_syntax os
Definition: parsecommon.h:289
directory_keyword v
Definition: parsecommon.h:280
const char * t
Definition: parsecommon.h:278
Macros to manage assertions, fatal and non-fatal.
#define tor_assert(expr)
Definition: util_bug.h:103
const char * find_whitespace_eos(const char *s, const char *eos)
Definition: util_string.c:377
int strcmpstart(const char *s1, const char *s2)
Definition: util_string.c:217
const char * find_whitespace(const char *s)
Definition: util_string.c:355
const char * eat_whitespace_eos(const char *s, const char *eos)
Definition: util_string.c:306
const char * eat_whitespace(const char *s)
Definition: util_string.c:279
const char * eat_whitespace_eos_no_nl(const char *s, const char *eos)
Definition: util_string.c:344
Header for util_string.c.