Tor 0.4.9.0-alpha-dev
unparseable.c
Go to the documentation of this file.
1/* Copyright (c) 2001 Matej Pfajfar.
2 * Copyright (c) 2001-2004, Roger Dingledine.
3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 * Copyright (c) 2007-2021, The Tor Project, Inc. */
5/* See LICENSE for licensing information */
6
7/**
8 * @file unparseable.c
9 * @brief Dump unparseable objects to disk.
10 **/
11
12#define UNPARSEABLE_PRIVATE
13
14#include "core/or/or.h"
15#include "app/config/config.h"
17#include "lib/sandbox/sandbox.h"
18
19#ifdef HAVE_SYS_STAT_H
20#include <sys/stat.h>
21#endif
22
23/* Dump mechanism for unparseable descriptors */
24
25/** List of dumped descriptors for FIFO cleanup purposes */
27/** Total size of dumped descriptors for FIFO cleanup */
29/** Directory to stash dumps in */
30static int have_dump_desc_dir = 0;
31static int problem_with_dump_desc_dir = 0;
32
33#define DESC_DUMP_DATADIR_SUBDIR "unparseable-descs"
34#define DESC_DUMP_BASE_FILENAME "unparseable-desc"
35
36/** Find the dump directory and check if we'll be able to create it */
37void
39{
40 char *dump_desc_dir;
41
42 dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
43
44 /*
45 * We just check for it, don't create it at this point; we'll
46 * create it when we need it if it isn't already there.
47 */
48 if (check_private_dir(dump_desc_dir, CPD_CHECK, get_options()->User) < 0) {
49 /* Error, log and flag it as having a problem */
50 log_notice(LD_DIR,
51 "Doesn't look like we'll be able to create descriptor dump "
52 "directory %s; dumps will be disabled.",
53 dump_desc_dir);
54 problem_with_dump_desc_dir = 1;
55 tor_free(dump_desc_dir);
56 return;
57 }
58
59 /* Check if it exists */
60 switch (file_status(dump_desc_dir)) {
61 case FN_DIR:
62 /* We already have a directory */
64 break;
65 case FN_NOENT:
66 /* Nothing, we'll need to create it later */
68 break;
69 case FN_ERROR:
70 /* Log and flag having a problem */
71 log_notice(LD_DIR,
72 "Couldn't check whether descriptor dump directory %s already"
73 " exists: %s",
74 dump_desc_dir, strerror(errno));
75 problem_with_dump_desc_dir = 1;
76 break;
77 case FN_FILE:
78 case FN_EMPTY:
79 default:
80 /* Something else was here! */
81 log_notice(LD_DIR,
82 "Descriptor dump directory %s already exists and isn't a "
83 "directory",
84 dump_desc_dir);
85 problem_with_dump_desc_dir = 1;
86 }
87
88 if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
90 }
91
92 tor_free(dump_desc_dir);
93}
94
95/** Create the dump directory if needed and possible */
96static void
98{
99 char *dump_desc_dir;
100
101 /* If the problem flag is set, skip it */
102 if (problem_with_dump_desc_dir) return;
103
104 /* Do we need it? */
105 if (!have_dump_desc_dir) {
106 dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
107
108 if (check_private_dir(dump_desc_dir, CPD_CREATE,
109 get_options()->User) < 0) {
110 log_notice(LD_DIR,
111 "Failed to create descriptor dump directory %s",
112 dump_desc_dir);
113 problem_with_dump_desc_dir = 1;
114 }
115
116 /* Okay, we created it */
118
119 tor_free(dump_desc_dir);
120 }
121}
122
123/** Dump desc FIFO/cleanup; take ownership of the given filename, add it to
124 * the FIFO, and clean up the oldest entries to the extent they exceed the
125 * configured cap. If any old entries with a matching hash existed, they
126 * just got overwritten right before this was called and we should adjust
127 * the total size counter without deleting them.
128 */
129static void
130dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256,
131 size_t len)
132{
133 dumped_desc_t *ent = NULL, *tmp;
134 uint64_t max_len;
135
136 tor_assert(filename != NULL);
137 tor_assert(digest_sha256 != NULL);
138
139 if (descs_dumped == NULL) {
140 /* We better have no length, then */
142 /* Make a smartlist */
144 }
145
146 /* Make a new entry to put this one in */
147 ent = tor_malloc_zero(sizeof(*ent));
148 ent->filename = filename;
149 ent->len = len;
150 ent->when = time(NULL);
151 memcpy(ent->digest_sha256, digest_sha256, DIGEST256_LEN);
152
153 /* Do we need to do some cleanup? */
155 /* Iterate over the list until we've freed enough space */
156 while (len > max_len - len_descs_dumped &&
157 smartlist_len(descs_dumped) > 0) {
158 /* Get the oldest thing on the list */
159 tmp = (dumped_desc_t *)(smartlist_get(descs_dumped, 0));
160
161 /*
162 * Check if it matches the filename we just added, so we don't delete
163 * something we just emitted if we get repeated identical descriptors.
164 */
165 if (strcmp(tmp->filename, filename) != 0) {
166 /* Delete it and adjust the length counter */
167 tor_unlink(tmp->filename);
168 tor_assert(len_descs_dumped >= tmp->len);
169 len_descs_dumped -= tmp->len;
170 log_info(LD_DIR,
171 "Deleting old unparseable descriptor dump %s due to "
172 "space limits",
173 tmp->filename);
174 } else {
175 /*
176 * Don't delete, but do adjust the counter since we will bump it
177 * later
178 */
179 tor_assert(len_descs_dumped >= tmp->len);
180 len_descs_dumped -= tmp->len;
181 log_info(LD_DIR,
182 "Replacing old descriptor dump %s with new identical one",
183 tmp->filename);
184 }
185
186 /* Free it and remove it from the list */
188 tor_free(tmp->filename);
189 tor_free(tmp);
190 }
191
192 /* Append our entry to the end of the list and bump the counter */
194 len_descs_dumped += len;
195}
196
197/** Check if we already have a descriptor for this hash and move it to the
198 * head of the queue if so. Return 1 if one existed and 0 otherwise.
199 */
200static int
201dump_desc_fifo_bump_hash(const uint8_t *digest_sha256)
202{
203 dumped_desc_t *match = NULL;
204
205 tor_assert(digest_sha256);
206
207 if (descs_dumped) {
208 /* Find a match if one exists */
209 SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
210 if (ent &&
211 tor_memeq(ent->digest_sha256, digest_sha256, DIGEST256_LEN)) {
212 /*
213 * Save a pointer to the match and remove it from its current
214 * position.
215 */
216 match = ent;
218 break;
219 }
220 } SMARTLIST_FOREACH_END(ent);
221
222 if (match) {
223 /* Update the timestamp */
224 match->when = time(NULL);
225 /* Add it back at the end of the list */
227
228 /* Indicate we found one */
229 return 1;
230 }
231 }
232
233 return 0;
234}
235
236/** Clean up on exit; just memory, leave the dumps behind
237 */
238void
240{
241 if (descs_dumped) {
242 /* Free each descriptor */
243 SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
244 tor_assert(ent);
245 tor_free(ent->filename);
246 tor_free(ent);
247 } SMARTLIST_FOREACH_END(ent);
248 /* Free the list */
249 smartlist_free(descs_dumped);
250 descs_dumped = NULL;
252 }
253}
254
255/** Handle one file for dump_desc_populate_fifo_from_directory(); make sure
256 * the filename is sensibly formed and matches the file content, and either
257 * return a dumped_desc_t for it or remove the file and return NULL.
258 */
259MOCK_IMPL(STATIC dumped_desc_t *,
260dump_desc_populate_one_file, (const char *dirname, const char *f))
261{
262 dumped_desc_t *ent = NULL;
263 char *path = NULL, *desc = NULL;
264 const char *digest_str;
265 char digest[DIGEST256_LEN], content_digest[DIGEST256_LEN];
266 /* Expected prefix before digest in filenames */
267 const char *f_pfx = DESC_DUMP_BASE_FILENAME ".";
268 /*
269 * Stat while reading; this is important in case the file
270 * contains a NUL character.
271 */
272 struct stat st;
273
274 /* Sanity-check args */
275 tor_assert(dirname != NULL);
276 tor_assert(f != NULL);
277
278 /* Form the full path */
279 tor_asprintf(&path, "%s" PATH_SEPARATOR "%s", dirname, f);
280
281 /* Check that f has the form DESC_DUMP_BASE_FILENAME.<digest256> */
282
283 if (!strcmpstart(f, f_pfx)) {
284 /* It matches the form, but is the digest parseable as such? */
285 digest_str = f + strlen(f_pfx);
286 if (base16_decode(digest, DIGEST256_LEN,
287 digest_str, strlen(digest_str)) != DIGEST256_LEN) {
288 /* We failed to decode it */
289 digest_str = NULL;
290 }
291 } else {
292 /* No match */
293 digest_str = NULL;
294 }
295
296 if (!digest_str) {
297 /* We couldn't get a sensible digest */
298 log_notice(LD_DIR,
299 "Removing unrecognized filename %s from unparseable "
300 "descriptors directory", f);
301 tor_unlink(path);
302 /* We're done */
303 goto done;
304 }
305
306 /*
307 * The filename has the form DESC_DUMP_BASE_FILENAME "." <digest256> and
308 * we've decoded the digest. Next, check that we can read it and the
309 * content matches this digest. We are relying on the fact that if the
310 * file contains a '\0', read_file_to_str() will allocate space for and
311 * read the entire file and return the correct size in st.
312 */
313 desc = read_file_to_str(path, RFTS_IGNORE_MISSING|RFTS_BIN, &st);
314 if (!desc) {
315 /* We couldn't read it */
316 log_notice(LD_DIR,
317 "Failed to read %s from unparseable descriptors directory; "
318 "attempting to remove it.", f);
319 tor_unlink(path);
320 /* We're done */
321 goto done;
322 }
323
324#if SIZE_MAX > UINT64_MAX
325 if (BUG((uint64_t)st.st_size > (uint64_t)SIZE_MAX)) {
326 /* LCOV_EXCL_START
327 * Should be impossible since RFTS above should have failed to read the
328 * huge file into RAM. */
329 goto done;
330 /* LCOV_EXCL_STOP */
331 }
332#endif /* SIZE_MAX > UINT64_MAX */
333 if (BUG(st.st_size < 0)) {
334 /* LCOV_EXCL_START
335 * Should be impossible, since the OS isn't supposed to be b0rken. */
336 goto done;
337 /* LCOV_EXCL_STOP */
338 }
339 /* (Now we can be sure that st.st_size is safe to cast to a size_t.) */
340
341 /*
342 * We got one; now compute its digest and check that it matches the
343 * filename.
344 */
345 if (crypto_digest256((char *)content_digest, desc, (size_t) st.st_size,
346 DIGEST_SHA256) < 0) {
347 /* Weird, but okay */
348 log_info(LD_DIR,
349 "Unable to hash content of %s from unparseable descriptors "
350 "directory", f);
351 tor_unlink(path);
352 /* We're done */
353 goto done;
354 }
355
356 /* Compare the digests */
357 if (tor_memneq(digest, content_digest, DIGEST256_LEN)) {
358 /* No match */
359 log_info(LD_DIR,
360 "Hash of %s from unparseable descriptors directory didn't "
361 "match its filename; removing it", f);
362 tor_unlink(path);
363 /* We're done */
364 goto done;
365 }
366
367 /* Okay, it's a match, we should prepare ent */
368 ent = tor_malloc_zero(sizeof(dumped_desc_t));
369 ent->filename = path;
370 memcpy(ent->digest_sha256, digest, DIGEST256_LEN);
371 ent->len = (size_t) st.st_size;
372 ent->when = st.st_mtime;
373 /* Null out path so we don't free it out from under ent */
374 path = NULL;
375
376 done:
377 /* Free allocations if we had them */
378 tor_free(desc);
379 tor_free(path);
380
381 return ent;
382}
383
384/** Sort helper for dump_desc_populate_fifo_from_directory(); compares
385 * the when field of dumped_desc_ts in a smartlist to put the FIFO in
386 * the correct order after reconstructing it from the directory.
387 */
388static int
389dump_desc_compare_fifo_entries(const void **a_v, const void **b_v)
390{
391 const dumped_desc_t **a = (const dumped_desc_t **)a_v;
392 const dumped_desc_t **b = (const dumped_desc_t **)b_v;
393
394 if ((a != NULL) && (*a != NULL)) {
395 if ((b != NULL) && (*b != NULL)) {
396 /* We have sensible dumped_desc_ts to compare */
397 if ((*a)->when < (*b)->when) {
398 return -1;
399 } else if ((*a)->when == (*b)->when) {
400 return 0;
401 } else {
402 return 1;
403 }
404 } else {
405 /*
406 * We shouldn't see this, but what the hell, NULLs precede everything
407 * else
408 */
409 return 1;
410 }
411 } else {
412 return -1;
413 }
414}
415
416/** Scan the contents of the directory, and update FIFO/counters; this will
417 * consistency-check descriptor dump filenames against hashes of descriptor
418 * dump file content, and remove any inconsistent/unreadable dumps, and then
419 * reconstruct the dump FIFO as closely as possible for the last time the
420 * tor process shut down. If a previous dump was repeated more than once and
421 * moved ahead in the FIFO, the mtime will not have been updated and the
422 * reconstructed order will be wrong, but will always be a permutation of
423 * the original.
424 */
425STATIC void
427{
428 smartlist_t *files = NULL;
429 dumped_desc_t *ent = NULL;
430
431 tor_assert(dirname != NULL);
432
433 /* Get a list of files */
434 files = tor_listdir(dirname);
435 if (!files) {
436 log_notice(LD_DIR,
437 "Unable to get contents of unparseable descriptor dump "
438 "directory %s",
439 dirname);
440 return;
441 }
442
443 /*
444 * Iterate through the list and decide which files should go in the
445 * FIFO and which should be purged.
446 */
447
448 SMARTLIST_FOREACH_BEGIN(files, char *, f) {
449 /* Try to get a FIFO entry */
450 ent = dump_desc_populate_one_file(dirname, f);
451 if (ent) {
452 /*
453 * We got one; add it to the FIFO. No need for duplicate checking
454 * here since we just verified the name and digest match.
455 */
456
457 /* Make sure we have a list to add it to */
458 if (!descs_dumped) {
461 }
462
463 /* Add it and adjust the counter */
465 len_descs_dumped += ent->len;
466 }
467 /*
468 * If we didn't, we will have unlinked the file if necessary and
469 * possible, and emitted a log message about it, so just go on to
470 * the next.
471 */
472 } SMARTLIST_FOREACH_END(f);
473
474 /* Did we get anything? */
475 if (descs_dumped != NULL) {
476 /* Sort the FIFO in order of increasing timestamp */
478
479 /* Log some stats */
480 log_info(LD_DIR,
481 "Reloaded unparseable descriptor dump FIFO with %d dump(s) "
482 "totaling %"PRIu64 " bytes",
483 smartlist_len(descs_dumped), (len_descs_dumped));
484 }
485
486 /* Free the original list */
487 SMARTLIST_FOREACH(files, char *, f, tor_free(f));
488 smartlist_free(files);
489}
490
491/** For debugging purposes, dump unparseable descriptor *<b>desc</b> of
492 * type *<b>type</b> to file $DATADIR/unparseable-desc. Do not write more
493 * than one descriptor to disk per minute. If there is already such a
494 * file in the data directory, overwrite it. */
495MOCK_IMPL(void,
496dump_desc,(const char *desc, const char *type))
497{
498 tor_assert(desc);
499 tor_assert(type);
500#ifndef TOR_UNIT_TESTS
501 /* For now, we are disabling this function, since it can be called with
502 * strings that are far too long. We can turn it back on if we fix it
503 * someday, but we'd need to give it a length argument. A likelier
504 * resolution here is simply to remove this module entirely. See tor#40286
505 * for background. */
506 if (1)
507 return;
508#endif
509 size_t len;
510 /* The SHA256 of the string */
511 uint8_t digest_sha256[DIGEST256_LEN];
512 char digest_sha256_hex[HEX_DIGEST256_LEN+1];
513 /* Filename to log it to */
514 char *debugfile, *debugfile_base;
515
516 /* Get the hash for logging purposes anyway */
517 len = strlen(desc);
518 if (crypto_digest256((char *)digest_sha256, desc, len,
519 DIGEST_SHA256) < 0) {
520 log_info(LD_DIR,
521 "Unable to parse descriptor of type %s, and unable to even hash"
522 " it!", type);
523 goto err;
524 }
525
526 base16_encode(digest_sha256_hex, sizeof(digest_sha256_hex),
527 (const char *)digest_sha256, sizeof(digest_sha256));
528
529 /*
530 * We mention type and hash in the main log; don't clutter up the files
531 * with anything but the exact dump.
532 */
533 tor_asprintf(&debugfile_base,
534 DESC_DUMP_BASE_FILENAME ".%s", digest_sha256_hex);
535 debugfile = get_datadir_fname2(DESC_DUMP_DATADIR_SUBDIR, debugfile_base);
536
537 /*
538 * Check if the sandbox is active or will become active; see comment
539 * below at the log message for why.
540 */
541 if (!(sandbox_is_active() || get_options()->Sandbox)) {
542 if (len <= get_options()->MaxUnparseableDescSizeToLog) {
543 if (!dump_desc_fifo_bump_hash(digest_sha256)) {
544 /* Create the directory if needed */
546 /* Make sure we've got it */
547 if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
548 /* Write it, and tell the main log about it */
549 write_str_to_file(debugfile, desc, 1);
550 log_info(LD_DIR,
551 "Unable to parse descriptor of type %s with hash %s and "
552 "length %lu. See file %s in data directory for details.",
553 type, digest_sha256_hex, (unsigned long)len,
554 debugfile_base);
555 dump_desc_fifo_add_and_clean(debugfile, digest_sha256, len);
556 /* Since we handed ownership over, don't free debugfile later */
557 debugfile = NULL;
558 } else {
559 /* Problem with the subdirectory */
560 log_info(LD_DIR,
561 "Unable to parse descriptor of type %s with hash %s and "
562 "length %lu. Descriptor not dumped because we had a "
563 "problem creating the " DESC_DUMP_DATADIR_SUBDIR
564 " subdirectory",
565 type, digest_sha256_hex, (unsigned long)len);
566 /* We do have to free debugfile in this case */
567 }
568 } else {
569 /* We already had one with this hash dumped */
570 log_info(LD_DIR,
571 "Unable to parse descriptor of type %s with hash %s and "
572 "length %lu. Descriptor not dumped because one with that "
573 "hash has already been dumped.",
574 type, digest_sha256_hex, (unsigned long)len);
575 /* We do have to free debugfile in this case */
576 }
577 } else {
578 /* Just log that it happened without dumping */
579 log_info(LD_DIR,
580 "Unable to parse descriptor of type %s with hash %s and "
581 "length %lu. Descriptor not dumped because it exceeds maximum"
582 " log size all by itself.",
583 type, digest_sha256_hex, (unsigned long)len);
584 /* We do have to free debugfile in this case */
585 }
586 } else {
587 /*
588 * Not logging because the sandbox is active and seccomp2 apparently
589 * doesn't have a sensible way to allow filenames according to a pattern
590 * match. (If we ever figure out how to say "allow writes to /regex/",
591 * remove this checK).
592 */
593 log_info(LD_DIR,
594 "Unable to parse descriptor of type %s with hash %s and "
595 "length %lu. Descriptor not dumped because the sandbox is "
596 "configured",
597 type, digest_sha256_hex, (unsigned long)len);
598 }
599
600 tor_free(debugfile_base);
601 tor_free(debugfile);
602
603 err:
604 return;
605}
int base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:506
void base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:478
const or_options_t * get_options(void)
Definition: config.c:944
Header file for config.c.
#define HEX_DIGEST256_LEN
Definition: crypto_digest.h:37
int crypto_digest256(char *digest, const char *m, size_t len, digest_algorithm_t algorithm)
int tor_memeq(const void *a, const void *b, size_t sz)
Definition: di_ops.c:107
#define tor_memneq(a, b, sz)
Definition: di_ops.h:21
#define DIGEST256_LEN
Definition: digest_sizes.h:23
int check_private_dir(const char *dirname, cpd_check_t check, const char *effective_user)
Definition: dir.c:71
smartlist_t * tor_listdir(const char *dirname)
Definition: dir.c:307
int write_str_to_file(const char *fname, const char *str, int bin)
Definition: files.c:274
int tor_unlink(const char *pathname)
Definition: files.c:154
#define RFTS_IGNORE_MISSING
Definition: files.h:101
file_status_t file_status(const char *filename)
Definition: files.c:212
#define RFTS_BIN
Definition: files.h:99
#define LD_DIR
Definition: log.h:88
#define tor_free(p)
Definition: malloc.h:56
Master header file for Tor-specific functionality.
int tor_asprintf(char **strp, const char *fmt,...)
Definition: printf.c:75
int sandbox_is_active(void)
Definition: sandbox.c:2348
Header file for sandbox.c.
void smartlist_sort(smartlist_t *sl, int(*compare)(const void **a, const void **b))
Definition: smartlist.c:334
smartlist_t * smartlist_new(void)
void smartlist_add(smartlist_t *sl, void *element)
void smartlist_del_keeporder(smartlist_t *sl, int idx)
#define SMARTLIST_FOREACH_BEGIN(sl, type, var)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
#define SMARTLIST_DEL_CURRENT_KEEPORDER(sl, var)
uint64_t MaxUnparseableDescSizeToLog
#define STATIC
Definition: testsupport.h:32
#define MOCK_IMPL(rv, funcname, arglist)
Definition: testsupport.h:133
STATIC void dump_desc_populate_fifo_from_directory(const char *dirname)
Definition: unparseable.c:426
static void dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256, size_t len)
Definition: unparseable.c:130
STATIC smartlist_t * descs_dumped
Definition: unparseable.c:26
STATIC dumped_desc_t * dump_desc_populate_one_file(const char *dirname, const char *f)
Definition: unparseable.c:260
void dump_desc_fifo_cleanup(void)
Definition: unparseable.c:239
static void dump_desc_create_dir(void)
Definition: unparseable.c:97
static int have_dump_desc_dir
Definition: unparseable.c:30
void dump_desc(const char *desc, const char *type)
Definition: unparseable.c:496
static int dump_desc_compare_fifo_entries(const void **a_v, const void **b_v)
Definition: unparseable.c:389
STATIC uint64_t len_descs_dumped
Definition: unparseable.c:28
static int dump_desc_fifo_bump_hash(const uint8_t *digest_sha256)
Definition: unparseable.c:201
void dump_desc_init(void)
Definition: unparseable.c:38
Header file for unparseable.c.
#define tor_assert(expr)
Definition: util_bug.h:103
int strcmpstart(const char *s1, const char *s2)
Definition: util_string.c:217