Tor 0.4.9.0-alpha-dev
storagedir.c
Go to the documentation of this file.
1/* Copyright (c) 2017-2021, The Tor Project, Inc. */
2/* See LICENSE for licensing information */
3
4/**
5 * \file storagedir.c
6 *
7 * \brief An abstraction for a directory full of similar files.
8 *
9 * Storagedirs are used by our consensus cache code, and may someday also get
10 * used for unparseable objects. A large part of the need for this type is to
11 * work around the limitations in our sandbox code, where all filenames need
12 * to be registered in advance.
13 **/
14
15#include "lib/fs/storagedir.h"
16
19#include "lib/fs/dir.h"
20#include "lib/fs/files.h"
21#include "lib/fs/mmap.h"
22#include "lib/log/escape.h"
23#include "lib/log/log.h"
24#include "lib/log/util_bug.h"
25#include "lib/malloc/malloc.h"
26#include "lib/memarea/memarea.h"
27#include "lib/sandbox/sandbox.h"
28#include "lib/string/printf.h"
30
31#ifdef HAVE_SYS_TYPES_H
32#include <sys/types.h>
33#endif
34#ifdef HAVE_SYS_STAT_H
35#include <sys/stat.h>
36#endif
37#ifdef HAVE_UNISTD_H
38#include <unistd.h>
39#endif
40#include <stdlib.h>
41#include <errno.h>
42#include <string.h>
43
44#define FNAME_MIN_NUM 1000
45
46/** A storage_dir_t represents a directory full of similar cached
47 * files. Filenames are decimal integers. Files can be cleaned as needed
48 * to limit total disk usage. */
50 /** Directory holding the files for this storagedir. */
51 char *directory;
52 /** Either NULL, or a directory listing of the directory (as a smartlist
53 * of strings */
55 /** The largest number of non-temporary files we'll place in the
56 * directory. */
58 /** If true, then 'usage' has been computed. */
60 /** The total number of bytes used in this directory */
61 uint64_t usage;
62};
63
64/** Create or open a new storage directory at <b>dirname</b>, with
65 * capacity for up to <b>max_files</b> files.
66 */
68storage_dir_new(const char *dirname, int max_files)
69{
70 if (check_private_dir(dirname, CPD_CREATE, NULL) < 0)
71 return NULL;
72
73 storage_dir_t *d = tor_malloc_zero(sizeof(storage_dir_t));
74 d->directory = tor_strdup(dirname);
75 d->max_files = max_files;
76 return d;
77}
78
79/**
80 * Drop all in-RAM storage for <b>d</b>. Does not delete any files.
81 */
82void
84{
85 if (d == NULL)
86 return;
88 if (d->contents) {
89 SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
90 smartlist_free(d->contents);
91 }
92 tor_free(d);
93}
94
95/**
96 * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
97 * operations that <b>d</b> will need.
98 *
99 * The presence of this function is why we need an upper limit on the
100 * number of files in a storage_dir_t: we need to approve file operations
101 * one by one.
102 */
103int
105{
106 int problems = 0;
107 int idx;
108 for (idx = FNAME_MIN_NUM; idx < FNAME_MIN_NUM + d->max_files; ++idx) {
109 char *path = NULL, *tmppath = NULL;
110 tor_asprintf(&path, "%s/%d", d->directory, idx);
111 tor_asprintf(&tmppath, "%s/%d.tmp", d->directory, idx);
112
113 problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(path));
114 problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(tmppath));
115 problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(path));
116 problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(tmppath));
117 problems += sandbox_cfg_allow_rename(cfg,
118 tor_strdup(tmppath), tor_strdup(path));
119
120 tor_free(path);
121 tor_free(tmppath);
122 }
123
124 return problems ? -1 : 0;
125}
126
127/**
128 * Remove all files in <b>d</b> whose names end with ".tmp".
129 *
130 * Requires that the contents field of <b>d</b> is set.
131 */
132static void
134{
135 if (!d->contents)
136 return;
137 SMARTLIST_FOREACH_BEGIN(d->contents, char *, fname) {
138 if (strcmpend(fname, ".tmp"))
139 continue;
140 char *path = NULL;
141 tor_asprintf(&path, "%s/%s", d->directory, fname);
142 if (unlink(sandbox_intern_string(path))) {
143 log_warn(LD_FS, "Unable to unlink %s while cleaning "
144 "temporary files: %s", escaped(path), strerror(errno));
145 tor_free(path);
146 continue;
147 }
148 tor_free(path);
150 tor_free(fname);
151 } SMARTLIST_FOREACH_END(fname);
152
153 d->usage_known = 0;
154}
155
156/**
157 * Re-scan the directory <b>d</b> to learn its contents.
158 */
159static int
161{
162 if (d->contents) {
163 SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
164 smartlist_free(d->contents);
165 }
166 d->usage = 0;
167 d->usage_known = 0;
168 if (NULL == (d->contents = tor_listdir(d->directory))) {
169 return -1;
170 }
172 return 0;
173}
174
175/**
176 * Return a smartlist containing the filenames within <b>d</b>.
177 */
178const smartlist_t *
180{
181 if (! d->contents)
183 return d->contents;
184}
185
186/**
187 * Return the total number of bytes used for storage in <b>d</b>.
188 */
189uint64_t
191{
192 if (d->usage_known)
193 return d->usage;
194
195 uint64_t total = 0;
196 SMARTLIST_FOREACH_BEGIN(storage_dir_list(d), const char *, cp) {
197 char *path = NULL;
198 struct stat st;
199 tor_asprintf(&path, "%s/%s", d->directory, cp);
200 if (stat(sandbox_intern_string(path), &st) == 0) {
201 total += st.st_size;
202 }
203 tor_free(path);
204 } SMARTLIST_FOREACH_END(cp);
205
206 d->usage = total;
207 d->usage_known = 1;
208 return d->usage;
209}
210
211/** Mmap a specified file within <b>d</b>.
212 *
213 * On failure, return NULL and set errno as for tor_mmap_file(). */
215storage_dir_map(storage_dir_t *d, const char *fname)
216{
217 char *path = NULL;
218 tor_asprintf(&path, "%s/%s", d->directory, fname);
219 tor_mmap_t *result = tor_mmap_file(path);
220 int errval = errno;
221 tor_free(path);
222 if (result == NULL)
223 errno = errval;
224 return result;
225}
226
227/** Read a file within <b>d</b> into a newly allocated buffer. Set
228 * *<b>sz_out</b> to its size. */
229uint8_t *
230storage_dir_read(storage_dir_t *d, const char *fname, int bin, size_t *sz_out)
231{
232 const int flags = bin ? RFTS_BIN : 0;
233
234 char *path = NULL;
235 tor_asprintf(&path, "%s/%s", d->directory, fname);
236 struct stat st;
237 char *contents = read_file_to_str(path, flags, &st);
238 if (contents && sz_out) {
239 // it fits in RAM, so we know its size is less than SIZE_MAX
240#if UINT64_MAX > SIZE_MAX
241 tor_assert((uint64_t)st.st_size <= SIZE_MAX);
242#endif
243 *sz_out = (size_t) st.st_size;
244 }
245
246 tor_free(path);
247 return (uint8_t *) contents;
248}
249
250/** Helper: Find an unused filename within the directory */
251static char *
253{
254 if (!d->contents) {
255 if (storage_dir_rescan(d) < 0)
256 return NULL;
257 }
258
259 char buf[16];
260 int i;
261 /* Yuck; this is quadratic. Fortunately, that shouldn't matter much,
262 * since disk writes are more expensive by a lot. */
263 for (i = FNAME_MIN_NUM; i < FNAME_MIN_NUM + d->max_files; ++i) {
264 tor_snprintf(buf, sizeof(buf), "%d", i);
265 if (!smartlist_contains_string(d->contents, buf)) {
266 return tor_strdup(buf);
267 }
268 }
269 return NULL;
270}
271
272/** Helper: As storage_dir_save_bytes_to_file, but store a smartlist of
273 * sized_chunk_t rather than a single byte array. */
274static int
276 const smartlist_t *chunks,
277 int binary,
278 char **fname_out)
279{
280 uint64_t total_length = 0;
281 char *fname = find_unused_fname(d);
282 if (!fname)
283 return -1;
284
285 SMARTLIST_FOREACH(chunks, const sized_chunk_t *, ch,
286 total_length += ch->len);
287
288 char *path = NULL;
289 tor_asprintf(&path, "%s/%s", d->directory, fname);
290
291 int r = write_chunks_to_file(path, chunks, binary, 0);
292 if (r == 0) {
293 if (d->usage_known)
294 d->usage += total_length;
295 if (fname_out) {
296 *fname_out = tor_strdup(fname);
297 }
298 if (d->contents)
299 smartlist_add(d->contents, tor_strdup(fname));
300 }
301 tor_free(fname);
302 tor_free(path);
303 return r;
304}
305
306/** Try to write the <b>length</b> bytes at <b>data</b> into a new file
307 * in <b>d</b>. On success, return 0 and set *<b>fname_out</b> to a
308 * newly allocated string containing the filename. On failure, return
309 * -1. */
310int
312 const uint8_t *data,
313 size_t length,
314 int binary,
315 char **fname_out)
316{
317 smartlist_t *chunks = smartlist_new();
318 sized_chunk_t chunk = { (const char *)data, length };
319 smartlist_add(chunks, &chunk);
320 int r = storage_dir_save_chunks_to_file(d, chunks, binary, fname_out);
321 smartlist_free(chunks);
322 return r;
323}
324
325/**
326 * As storage_dir_save_bytes_to_file, but saves a NUL-terminated string
327 * <b>str</b>.
328 */
329int
331 const char *str,
332 int binary,
333 char **fname_out)
334{
336 (const uint8_t*)str, strlen(str), binary, fname_out);
337}
338
339/**
340 * As storage_dir_save_bytes_to_file, but associates the data with the
341 * key-value pairs in <b>labels</b>. Files stored in this format can be
342 * recovered with storage_dir_map_labeled() or storage_dir_read_labeled().
343 */
344int
346 const config_line_t *labels,
347 const uint8_t *data,
348 size_t length,
349 char **fname_out)
350{
351 /*
352 * The storage format is to prefix the data with the key-value pairs in
353 * <b>labels</b>, and a single NUL separator. But code outside this module
354 * MUST NOT rely on that format.
355 */
356
357 smartlist_t *chunks = smartlist_new();
358 memarea_t *area = memarea_new();
359 const config_line_t *line;
360 for (line = labels; line; line = line->next) {
361 sized_chunk_t *sz = memarea_alloc(area, sizeof(sized_chunk_t));
362 sz->len = strlen(line->key) + 1 + strlen(line->value) + 1;
363 const size_t allocated = sz->len + 1;
364 char *bytes = memarea_alloc(area, allocated);
365 tor_snprintf(bytes, allocated, "%s %s\n", line->key, line->value);
366 sz->bytes = bytes;
367 smartlist_add(chunks, sz);
368 }
369
370 sized_chunk_t *nul = memarea_alloc(area, sizeof(sized_chunk_t));
371 nul->len = 1;
372 nul->bytes = "\0";
373 smartlist_add(chunks, nul);
374
375 sized_chunk_t *datachunk = memarea_alloc(area, sizeof(sized_chunk_t));
376 datachunk->bytes = (const char *)data;
377 datachunk->len = length;
378 smartlist_add(chunks, datachunk);
379
380 int r = storage_dir_save_chunks_to_file(d, chunks, 1, fname_out);
381 smartlist_free(chunks);
382 memarea_drop_all(area);
383 return r;
384}
385
386/**
387 * Map a file that was created with storage_dir_save_labeled_to_file(). On
388 * failure, return NULL. On success, write a set of newly allocated labels
389 * into *<b>labels_out</b>, a pointer to the data into *<b>data_out</b>, and
390 * the data's size into *<b>sz_out</b>. On success, also return a tor_mmap_t
391 * object whose contents should not be used -- it needs to be kept around,
392 * though, for as long as <b>data_out</b> is going to be valid.
393 *
394 * On failure, set errno as for tor_mmap_file() if the file was missing or
395 * empty, and set errno to EINVAL if the file was not in the labeled
396 * format expected.
397 */
400 const char *fname,
401 config_line_t **labels_out,
402 const uint8_t **data_out,
403 size_t *sz_out)
404{
405 tor_mmap_t *m = storage_dir_map(dir, fname);
406 int errval;
407 if (! m) {
408 errval = errno;
409 goto err;
410 }
411 const char *nulp = memchr(m->data, '\0', m->size);
412 if (! nulp) {
413 errval = EINVAL;
414 goto err;
415 }
416 if (labels_out && config_get_lines(m->data, labels_out, 0) < 0) {
417 errval = EINVAL;
418 goto err;
419 }
420 size_t offset = nulp - m->data + 1;
421 tor_assert(offset <= m->size);
422 *data_out = (const uint8_t *)(m->data + offset);
423 *sz_out = m->size - offset;
424
425 return m;
426 err:
427 tor_munmap_file(m);
428 errno = errval;
429 return NULL;
430}
431
432/** As storage_dir_map_labeled, but return a new byte array containing the
433 * data. */
434uint8_t *
436 const char *fname,
437 config_line_t **labels_out,
438 size_t *sz_out)
439{
440 const uint8_t *data = NULL;
441 tor_mmap_t *m = storage_dir_map_labeled(dir, fname, labels_out,
442 &data, sz_out);
443 if (m == NULL)
444 return NULL;
445 uint8_t *result = tor_memdup(data, *sz_out);
446 tor_munmap_file(m);
447 return result;
448}
449
450/* Reduce the cached usage amount in <b>d</b> by <b>removed_file_size</b>.
451 * This function is a no-op if <b>d->usage_known</b> is 0. */
452static void
453storage_dir_reduce_usage(storage_dir_t *d, uint64_t removed_file_size)
454{
455 if (d->usage_known) {
456 if (! BUG(d->usage < removed_file_size)) {
457 /* This bug can also be triggered if an external process resized a file
458 * between the call to storage_dir_get_usage() that last checked
459 * actual usage (rather than relaying on cached usage), and the call to
460 * this function. */
461 d->usage -= removed_file_size;
462 } else {
463 /* If we underflowed the cached directory size, re-check the sizes of all
464 * the files in the directory. This makes storage_dir_shrink() quadratic,
465 * but only if a process is continually changing file sizes in the
466 * storage directory (in which case, we have bigger issues).
467 *
468 * We can't just reset usage_known, because storage_dir_shrink() relies
469 * on knowing the usage. */
471 (void)storage_dir_get_usage(d);
472 }
473 }
474}
475
476/**
477 * Remove the file called <b>fname</b> from <b>d</b>.
478 */
479void
481 const char *fname)
482{
483 char *path = NULL;
484 tor_asprintf(&path, "%s/%s", d->directory, fname);
485 const char *ipath = sandbox_intern_string(path);
486
487 uint64_t size = 0;
488 if (d->usage_known) {
489 struct stat st;
490 if (stat(ipath, &st) == 0) {
491 size = st.st_size;
492 }
493 }
494 if (unlink(ipath) == 0) {
495 storage_dir_reduce_usage(d, size);
496 } else {
497 log_warn(LD_FS, "Unable to unlink %s while removing file: %s",
498 escaped(path), strerror(errno));
499 tor_free(path);
500 return;
501 }
502 if (d->contents) {
504 }
505
506 tor_free(path);
507}
508
509/** Helper type: used to sort the members of storage directory by mtime. */
510typedef struct shrinking_dir_entry_t {
511 time_t mtime;
512 uint64_t size;
513 char *path;
515
516/** Helper: use with qsort to sort shrinking_dir_entry_t structs. */
517static int
518shrinking_dir_entry_compare(const void *a_, const void *b_)
519{
520 const shrinking_dir_entry_t *a = a_;
521 const shrinking_dir_entry_t *b = b_;
522
523 if (a->mtime < b->mtime)
524 return -1;
525 else if (a->mtime > b->mtime)
526 return 1;
527 else
528 return 0;
529}
530
531/**
532 * Try to free space by removing the oldest files in <b>d</b>. Delete
533 * until no more than <b>target_size</b> bytes are left, and at least
534 * <b>min_to_remove</b> files have been removed... or until there is
535 * nothing left to remove.
536 *
537 * Return 0 on success; -1 on failure.
538 */
539int
541 uint64_t target_size,
542 int min_to_remove)
543{
544 if (d->usage_known && d->usage <= target_size && !min_to_remove) {
545 /* Already small enough. */
546 return 0;
547 }
548
549 if (storage_dir_rescan(d) < 0)
550 return -1;
551
552 const uint64_t orig_usage = storage_dir_get_usage(d);
553 if (orig_usage <= target_size && !min_to_remove) {
554 /* Okay, small enough after rescan! */
555 return 0;
556 }
557
558 const int n = smartlist_len(d->contents);
559 shrinking_dir_entry_t *ents = tor_calloc(n, sizeof(shrinking_dir_entry_t));
560 SMARTLIST_FOREACH_BEGIN(d->contents, const char *, fname) {
561 shrinking_dir_entry_t *ent = &ents[fname_sl_idx];
562 struct stat st;
563 tor_asprintf(&ent->path, "%s/%s", d->directory, fname);
564 if (stat(sandbox_intern_string(ent->path), &st) == 0) {
565 ent->mtime = st.st_mtime;
566 ent->size = st.st_size;
567 }
568 } SMARTLIST_FOREACH_END(fname);
569
570 qsort(ents, n, sizeof(shrinking_dir_entry_t), shrinking_dir_entry_compare);
571
572 int idx = 0;
573 while ((d->usage > target_size || min_to_remove > 0) && idx < n) {
574 if (unlink(sandbox_intern_string(ents[idx].path)) == 0) {
575 storage_dir_reduce_usage(d, ents[idx].size);
576 --min_to_remove;
577 }
578 ++idx;
579 }
580
581 for (idx = 0; idx < n; ++idx) {
582 tor_free(ents[idx].path);
583 }
584 tor_free(ents);
585
587
588 return 0;
589}
590
591/** Remove all files in <b>d</b>. */
592int
594{
595 return storage_dir_shrink(d, 0, d->max_files);
596}
597
598/**
599 * Return the largest number of non-temporary files we're willing to
600 * store in <b>d</b>.
601 */
602int
604{
605 return d->max_files;
606}
int config_get_lines(const char *string, config_line_t **result, int extended)
Definition: confline.c:200
Header for confline.c.
int check_private_dir(const char *dirname, cpd_check_t check, const char *effective_user)
Definition: dir.c:71
smartlist_t * tor_listdir(const char *dirname)
Definition: dir.c:307
Header for dir.c.
const char * escaped(const char *s)
Definition: escape.c:126
Header for escape.c.
Wrappers for reading and writing data to files on disk.
#define RFTS_BIN
Definition: files.h:99
Headers for log.c.
#define LD_FS
Definition: log.h:70
Headers for util_malloc.c.
#define tor_free(p)
Definition: malloc.h:56
void * memarea_alloc(memarea_t *area, size_t sz)
Definition: memarea.c:209
memarea_t * memarea_new(void)
Definition: memarea.c:153
Header for memarea.c.
#define memarea_drop_all(area)
Definition: memarea.h:22
Header for mmap.c.
int tor_asprintf(char **strp, const char *fmt,...)
Definition: printf.c:75
int tor_snprintf(char *str, size_t size, const char *format,...)
Definition: printf.c:27
Header for printf.c.
int sandbox_cfg_allow_open_filename(sandbox_cfg_t **cfg, char *file)
Definition: sandbox.c:2299
int sandbox_cfg_allow_stat_filename(sandbox_cfg_t **cfg, char *file)
Definition: sandbox.c:2320
Header file for sandbox.c.
struct sandbox_cfg_elem_t sandbox_cfg_t
Definition: sandbox.h:35
#define sandbox_intern_string(s)
Definition: sandbox.h:110
void smartlist_string_remove(smartlist_t *sl, const char *element)
Definition: smartlist.c:74
int smartlist_contains_string(const smartlist_t *sl, const char *element)
Definition: smartlist.c:93
Header for smartlist.c.
smartlist_t * smartlist_new(void)
void smartlist_add(smartlist_t *sl, void *element)
#define SMARTLIST_FOREACH_BEGIN(sl, type, var)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
#define SMARTLIST_DEL_CURRENT(sl, var)
static int storage_dir_rescan(storage_dir_t *d)
Definition: storagedir.c:160
tor_mmap_t * storage_dir_map_labeled(storage_dir_t *dir, const char *fname, config_line_t **labels_out, const uint8_t **data_out, size_t *sz_out)
Definition: storagedir.c:399
uint64_t storage_dir_get_usage(storage_dir_t *d)
Definition: storagedir.c:190
static int storage_dir_save_chunks_to_file(storage_dir_t *d, const smartlist_t *chunks, int binary, char **fname_out)
Definition: storagedir.c:275
static int shrinking_dir_entry_compare(const void *a_, const void *b_)
Definition: storagedir.c:518
storage_dir_t * storage_dir_new(const char *dirname, int max_files)
Definition: storagedir.c:68
int storage_dir_save_bytes_to_file(storage_dir_t *d, const uint8_t *data, size_t length, int binary, char **fname_out)
Definition: storagedir.c:311
void storage_dir_free_(storage_dir_t *d)
Definition: storagedir.c:83
int storage_dir_remove_all(storage_dir_t *d)
Definition: storagedir.c:593
uint8_t * storage_dir_read_labeled(storage_dir_t *dir, const char *fname, config_line_t **labels_out, size_t *sz_out)
Definition: storagedir.c:435
static char * find_unused_fname(storage_dir_t *d)
Definition: storagedir.c:252
const smartlist_t * storage_dir_list(storage_dir_t *d)
Definition: storagedir.c:179
uint8_t * storage_dir_read(storage_dir_t *d, const char *fname, int bin, size_t *sz_out)
Definition: storagedir.c:230
int storage_dir_save_string_to_file(storage_dir_t *d, const char *str, int binary, char **fname_out)
Definition: storagedir.c:330
int storage_dir_save_labeled_to_file(storage_dir_t *d, const config_line_t *labels, const uint8_t *data, size_t length, char **fname_out)
Definition: storagedir.c:345
void storage_dir_remove_file(storage_dir_t *d, const char *fname)
Definition: storagedir.c:480
static void storage_dir_clean_tmpfiles(storage_dir_t *d)
Definition: storagedir.c:133
int storage_dir_register_with_sandbox(storage_dir_t *d, sandbox_cfg_t **cfg)
Definition: storagedir.c:104
int storage_dir_shrink(storage_dir_t *d, uint64_t target_size, int min_to_remove)
Definition: storagedir.c:540
tor_mmap_t * storage_dir_map(storage_dir_t *d, const char *fname)
Definition: storagedir.c:215
int storage_dir_get_max_files(storage_dir_t *d)
Definition: storagedir.c:603
Header for storagedir.c.
Definition: storagedir.c:510
smartlist_t * contents
Definition: storagedir.c:54
uint64_t usage
Definition: storagedir.c:61
char * directory
Definition: storagedir.c:51
size_t size
Definition: mmap.h:27
const char * data
Definition: mmap.h:26
Macros to manage assertions, fatal and non-fatal.
#define tor_assert(expr)
Definition: util_bug.h:103
int strcmpend(const char *s1, const char *s2)
Definition: util_string.c:253
Header for util_string.c.