Tor 0.4.9.0-alpha-dev
bloomfilt.c
Go to the documentation of this file.
1/* Copyright (c) 2003-2004, Roger Dingledine
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2021, The Tor Project, Inc. */
4/* See LICENSE for licensing information */
5
6/**
7 * \file bloomfilt.c
8 * \brief Uses bitarray_t to implement a bloom filter.
9 **/
10
11#include <stdlib.h>
12
13#include "lib/malloc/malloc.h"
15#include "lib/intmath/bits.h"
16#include "lib/log/util_bug.h"
17#include "ext/siphash.h"
18
19/** How many bloom-filter bits we set per address. This is twice the
20 * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit
21 * values. */
22#define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2)
23
25 /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each
26 * items. */
28 bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */
29 uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always
30 * one less than a power of two. */
31 bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
32};
33
34#define BIT(set, n) ((n) & (set)->mask)
35
36/** Add the element <b>item</b> to <b>set</b>. */
37void
39 const void *item)
40{
41 int i;
42 for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
43 uint64_t h = set->hashfn(&set->key[i], item);
44 uint32_t high_bits = (uint32_t)(h >> 32);
45 uint32_t low_bits = (uint32_t)(h);
46 bitarray_set(set->ba, BIT(set, high_bits));
47 bitarray_set(set->ba, BIT(set, low_bits));
48 }
49}
50
51/** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise,
52 * <em>probably</em> return zero. */
53int
55 const void *item)
56{
57 int i, matches = 0;
58 for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
59 uint64_t h = set->hashfn(&set->key[i], item);
60 uint32_t high_bits = (uint32_t)(h >> 32);
61 uint32_t low_bits = (uint32_t)(h);
62 // Note that !! is necessary here, since bitarray_is_set does not
63 // necessarily return 1 on true.
64 matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
65 matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
66 }
67 return matches == N_BITS_PER_ITEM;
68}
69
70/** Return a newly allocated bloomfilt_t, optimized to hold a total of
71 * <b>max_elements</b> elements with a reasonably low false positive weight.
72 *
73 * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide
74 * functions of the items, and the key material <b>random_key</b> to
75 * key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key.
76 **/
78bloomfilt_new(int max_elements,
79 bloomfilt_hash_fn hashfn,
80 const uint8_t *random_key)
81{
82 /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
83 * is the number of hash functions per entry, m is the bits in the array,
84 * and n is the number of elements inserted. For us, k==4, n<=max_elements,
85 * and m==n_bits= approximately max_elements*32. This gives
86 * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
87 *
88 * It would be more optimal in space vs false positives to get this false
89 * positive rate by going for k==13, and m==18.5n, but we also want to
90 * conserve CPU, and k==13 is pretty big.
91 */
92 int n_bits = 1u << (tor_log2(max_elements)+5);
93 bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t));
94 r->mask = n_bits - 1;
95 r->ba = bitarray_init_zero(n_bits);
96
97 tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN);
98 memcpy(r->key, random_key, sizeof(r->key));
99
100 r->hashfn = hashfn;
101
102 return r;
103}
104
105/** Free all storage held in <b>set</b>. */
106void
108{
109 if (!set)
110 return;
111 bitarray_free(set->ba);
112 tor_free(set);
113}
unsigned int bitarray_t
Definition: bitarray.h:30
static bitarray_t * bitarray_init_zero(unsigned int n_bits)
Definition: bitarray.h:33
static void bitarray_set(bitarray_t *b, int bit)
Definition: bitarray.h:68
static unsigned int bitarray_is_set(bitarray_t *b, int bit)
Definition: bitarray.h:81
int tor_log2(uint64_t u64)
Definition: bits.c:16
Header for bits.c.
bloomfilt_t * bloomfilt_new(int max_elements, bloomfilt_hash_fn hashfn, const uint8_t *random_key)
Definition: bloomfilt.c:78
void bloomfilt_add(bloomfilt_t *set, const void *item)
Definition: bloomfilt.c:38
#define N_BITS_PER_ITEM
Definition: bloomfilt.c:22
int bloomfilt_probably_contains(const bloomfilt_t *set, const void *item)
Definition: bloomfilt.c:54
void bloomfilt_free_(bloomfilt_t *set)
Definition: bloomfilt.c:107
Header for bloomfilt.c.
#define BLOOMFILT_N_HASHES
Definition: bloomfilt.h:23
#define BLOOMFILT_KEY_LEN
Definition: bloomfilt.h:26
Headers for util_malloc.c.
#define tor_free(p)
Definition: malloc.h:56
#define BIT(x)
Definition: protover.c:176
bloomfilt_hash_fn hashfn
Definition: bloomfilt.c:28
struct sipkey key[BLOOMFILT_N_HASHES]
Definition: bloomfilt.c:27
bitarray_t * ba
Definition: bloomfilt.c:31
uint32_t mask
Definition: bloomfilt.c:29
Definition: siphash.h:6
Macros to manage assertions, fatal and non-fatal.
#define tor_assert(expr)
Definition: util_bug.h:103