1
//! Hash table layer
2
//!
3
//! This uses the minimal utilities in [`crate::bucket_array::mem`] to build
4
//! a slightly higher level data structure that's meant to be used as a hash
5
//! table for uniformly distributed keys. The single and paired bucket arrays
6
//! from [`crate::bucket_array::mem`] are wrapped into concrete value-only
7
//! and key-value hash table stores, and unified again via common traits.
8
//!
9
//! This introduces new data types and conversions that let us represent one
10
//! item key in multiple formats: the full size key, its implied bucket number,
11
//! the remaining unused key bits, and a packed version of that remainder.
12

            
13
use crate::bucket_array::mem::{self, BucketArrayMemory};
14
use num_traits::{One, WrappingAdd, WrappingNeg, Zero};
15
use std::marker::PhantomData;
16
use std::ops::{BitAnd, Div, Mul, Not, Range, Rem, Shl, Shr, Sub};
17

            
18
/// Trait for accessing the overall shape of a bucket array
19
pub(crate) trait Shape<K: Key> {
20
    /// The number of buckets in this array
21
    const NUM_BUCKETS: usize;
22

            
23
    /// Get the range of items within a single bucket.
24
    fn item_range(&self, bucket: usize) -> Range<usize>;
25

            
26
    /// Get the key divisor: the number of buckets but as a [`Key`] instance.
27
    #[inline(always)]
28
2653614186
    fn divisor(&self) -> K {
29
2653614186
        K::from_bucket_index(Self::NUM_BUCKETS)
30
2653614186
    }
31

            
32
    /// Split a wide key into the bucket index and the remaining bits.
33
    #[inline(always)]
34
1506373519
    fn split_wide_key(&self, key: K) -> (usize, K) {
35
1506373519
        let divisor = self.divisor();
36
1506373519
        ((key % divisor).into_bucket_index(), (key / divisor))
37
1506373519
    }
38

            
39
    /// Rebuild a wide key from its split components.
40
    #[inline(always)]
41
567301425
    fn join_wide_key(&self, bucket: usize, remainder: K) -> K {
42
567301425
        let divisor = self.divisor();
43
567301425
        remainder * divisor + K::from_bucket_index(bucket)
44
567301425
    }
45
}
46

            
47
/// Trait for writing new key/value pairs to a bucket array
48
pub(crate) trait Insert<K: Key, V: Copy> {
49
    /// Append a new item to its sorting bucket, or return Err(()) if it's full
50
    fn insert(&mut self, key: K, value: V) -> Result<(), ()>;
51
}
52

            
53
/// Trait for bucket arrays that include storage for keys
54
///
55
/// Keys are always used to index into the bucket array, but an array may
56
/// also optionally include storage for the remaining portion.
57
pub(crate) trait KeyLookup<S: KeyStorage<K>, K: Key> {
58
    /// Retrieve the stored key remainder bits for this item
59
    fn item_stored_key(&self, bucket: usize, item: usize) -> S;
60

            
61
    /// Retrieve the key for a particular item, as a full width key
62
    fn item_full_key(&self, bucket: usize, item: usize) -> K;
63
}
64

            
65
/// Trait for bucket arrays that include storage for values
66
///
67
/// Values are opaque data, any [`Copy`] type may be used.
68
pub(crate) trait ValueLookup<V: Copy> {
69
    /// Retrieve the Value for a particular item
70
    fn item_value(&self, bucket: usize, item: usize) -> V;
71
}
72

            
73
/// Concrete bucket array with parallel [`BucketArrayMemory`] for key and value
74
/// storage
75
///
76
/// This is the basic data type used for one layer of sorting buckets.
77
///
78
/// Takes several type parameters so the narrowest possible types can be
79
/// chosen for counters, keys, and values. Keys take two types: the 'wide'
80
/// version that appears in the API and a 'storage' version that's been
81
/// stripped of the data redundant with its bucket position.
82
///
83
/// The validity of [`BucketArrayMemory`] entries is ensured by the combination
84
/// of our mutable ref to the `BucketArrayMemory` itself and our tracking of
85
/// bucket counts within the lifetime of that reference.
86
pub(crate) struct KeyValueBucketArray<
87
    // Lifetime for mutable reference to key storage memory
88
    'k,
89
    // Lifetime for mutable reference to value storage memory
90
    'v,
91
    // Number of buckets
92
    const N: usize,
93
    // Maximum number of items per bucket
94
    const CAP: usize,
95
    // Type for bucket item counts
96
    C: Count,
97
    // Full size key type, used in the API and for calculating bucket indices
98
    K: Key,
99
    // Storage type for keys, potentially smaller than `K`
100
    KS: KeyStorage<K>,
101
    // Value type
102
    V: Copy,
103
>(
104
    mem::BucketArrayPair<'k, 'v, N, CAP, C, KS, V>,
105
    PhantomData<K>,
106
);
107

            
108
impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
109
    KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
110
{
111
    /// A new [`KeyValueBucketArray`] wraps two mutable [`BucketArrayMemory`]
112
    /// references and adds state to track which items are valid.
113
14175
    pub(crate) fn new(
114
14175
        key_mem: &'k mut BucketArrayMemory<N, CAP, KS>,
115
14175
        value_mem: &'v mut BucketArrayMemory<N, CAP, V>,
116
14175
    ) -> Self {
117
14175
        Self(mem::BucketArrayPair::new(key_mem, value_mem), PhantomData)
118
14175
    }
119

            
120
    /// Keep the counts and the value memory but drop the key memory.
121
    ///
122
    /// Returns a new [`ValueBucketArray`].
123
4725
    pub(crate) fn drop_key_storage(self) -> ValueBucketArray<'v, N, CAP, C, K, V> {
124
4725
        ValueBucketArray(self.0.drop_first(), self.1)
125
4725
    }
126
}
127

            
128
/// Concrete bucket array with a single [`BucketArrayMemory`] for value storage
129
///
130
/// Keys are used for bucket indexing but the remainder bits are not stored.
131
pub(crate) struct ValueBucketArray<
132
    // Lifetime for mutable reference to value storage memory
133
    'v,
134
    // Number of buckets
135
    const N: usize,
136
    // Maximum number of items per bucket
137
    const CAP: usize,
138
    // Type for bucket item counts
139
    C: Count,
140
    // Full size key type, used in the API and for calculating bucket indices
141
    K: Key,
142
    // Value type
143
    V: Copy,
144
>(mem::BucketArray<'v, N, CAP, C, V>, PhantomData<K>);
145

            
146
impl<'v, const N: usize, const CAP: usize, C: Count, K: Key, V: Copy>
147
    ValueBucketArray<'v, N, CAP, C, K, V>
148
{
149
    /// A new [`ValueBucketArray`] wraps one mutable [`BucketArrayMemory`]
150
    /// reference and adds a counts array to track which items are valid.
151
1828575
    pub(crate) fn new(value_mem: &'v mut BucketArrayMemory<N, CAP, V>) -> Self {
152
1828575
        Self(mem::BucketArray::new(value_mem), PhantomData)
153
1828575
    }
154
}
155

            
156
impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
157
    Shape<K> for KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
158
{
159
    /// Number of buckets in the array
160
    const NUM_BUCKETS: usize = N;
161

            
162
    #[inline(always)]
163
1451250
    fn item_range(&self, bucket: usize) -> Range<usize> {
164
1451250
        self.0.item_range(bucket)
165
1451250
    }
166
}
167

            
168
impl<'v, const N: usize, const CAP: usize, C: Count, K: Key, V: Copy> Shape<K>
169
    for ValueBucketArray<'v, N, CAP, C, K, V>
170
{
171
    /// Number of buckets in the array
172
    const NUM_BUCKETS: usize = N;
173

            
174
    #[inline(always)]
175
187697075
    fn item_range(&self, bucket: usize) -> Range<usize> {
176
187697075
        self.0.item_range(bucket)
177
187697075
    }
178
}
179

            
180
impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
181
    Insert<K, V> for KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
182
{
183
    #[inline(always)]
184
742056082
    fn insert(&mut self, key: K, value: V) -> Result<(), ()> {
185
742056082
        let (bucket, key_remainder) = self.split_wide_key(key);
186
742056082
        let key_storage = KS::from_key(key_remainder);
187
742056082
        self.0.insert(bucket, key_storage, value)
188
742056082
    }
189
}
190

            
191
impl<'v, const N: usize, const CAP: usize, C: Count, K: Key, V: Copy> Insert<K, V>
192
    for ValueBucketArray<'v, N, CAP, C, K, V>
193
{
194
    #[inline(always)]
195
187758500
    fn insert(&mut self, key: K, value: V) -> Result<(), ()> {
196
187758500
        let (bucket, _) = self.split_wide_key(key);
197
187758500
        self.0.insert(bucket, value)
198
187758500
    }
199
}
200

            
201
impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
202
    KeyLookup<KS, K> for KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
203
{
204
    #[inline(always)]
205
755059925
    fn item_stored_key(&self, bucket: usize, item: usize) -> KS {
206
755059925
        self.0.item_value_first(bucket, item)
207
755059925
    }
208

            
209
    #[inline(always)]
210
567301425
    fn item_full_key(&self, bucket: usize, item: usize) -> K {
211
567301425
        self.join_wide_key(bucket, self.item_stored_key(bucket, item).into_key())
212
567301425
    }
213
}
214

            
215
impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
216
    ValueLookup<V> for KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
217
{
218
    #[inline(always)]
219
65844
    fn item_value(&self, bucket: usize, item: usize) -> V {
220
65844
        self.0.item_value_second(bucket, item)
221
65844
    }
222
}
223

            
224
impl<'v, const N: usize, const CAP: usize, C: Count, K: Key, V: Copy> ValueLookup<V>
225
    for ValueBucketArray<'v, N, CAP, C, K, V>
226
{
227
    #[inline(always)]
228
956690754
    fn item_value(&self, bucket: usize, item: usize) -> V {
229
956690754
        self.0.item_value(bucket, item)
230
956690754
    }
231
}
232

            
233
/// Trait for types that can be used as a count of items in a bucket
234
///
235
/// Whereas [`mem::Count`] is meant to be the minimum for that module's
236
/// purposes, this is an extended trait with features needed by the rest of
237
/// the crate.
238
pub(crate) trait Count: mem::Count + TryFrom<usize> {
239
    /// Convert from a usize item index, panic on overflow
240
    #[inline(always)]
241
187758500
    fn from_item_index(i: usize) -> Self {
242
187758500
        // Omit the original error type, to avoid propagating Debug bounds
243
187758500
        // for this trait. We might be able to stop doing this once the
244
187758500
        // associated_type_bounds Rust feature stabilizes.
245
187758500
        i.try_into()
246
187758500
            .map_err(|_| ())
247
187758500
            .expect("Bucket count type is always wide enough for item index")
248
187758500
    }
249
}
250

            
251
impl<T: mem::Count + TryFrom<usize>> Count for T {}
252

            
253
/// Types we can use as full width keys
254
pub(crate) trait Key:
255
    Copy
256
    + Zero
257
    + One
258
    + PartialEq<Self>
259
    + TryFrom<usize>
260
    + TryInto<usize>
261
    + Shl<usize, Output = Self>
262
    + Shr<usize, Output = Self>
263
    + Div<Self, Output = Self>
264
    + Rem<Self, Output = Self>
265
    + Mul<Self, Output = Self>
266
    + Not
267
    + Sub<Self, Output = Self>
268
    + BitAnd<Self, Output = Self>
269
    + WrappingAdd
270
    + WrappingNeg
271
{
272
    /// Build a Key from a bucket index.
273
    ///
274
    /// Panics if the index is out of range.
275
    #[inline(always)]
276
5217164253
    fn from_bucket_index(i: usize) -> Self {
277
5217164253
        i.try_into()
278
5217164253
            .map_err(|_| ())
279
5217164253
            .expect("Key type is always wide enough for a bucket index")
280
5217164253
    }
281

            
282
    /// Convert this Key back into a bucket index.
283
    ///
284
    /// Panics if the value would not fit in a `usize`. No other range
285
    /// checking on the bucket index is enforced here.
286
    #[inline(always)]
287
2357965071
    fn into_bucket_index(self) -> usize {
288
2357965071
        self.try_into()
289
2357965071
            .map_err(|_| ())
290
2357965071
            .expect("Key is a bucket index which always fits in a usize")
291
2357965071
    }
292

            
293
    /// Check if the N low bits of the key are zero.
294
    #[inline(always)]
295
379604350
    fn low_bits_are_zero(self, num_bits: usize) -> bool {
296
379604350
        (self & ((Self::one() << num_bits) - Self::one())) == Self::zero()
297
379604350
    }
298
}
299

            
300
impl<
301
        T: Copy
302
            + Zero
303
            + One
304
            + PartialEq<Self>
305
            + TryFrom<usize>
306
            + TryInto<usize>
307
            + Shl<usize, Output = Self>
308
            + Shr<usize, Output = Self>
309
            + Div<Self, Output = Self>
310
            + Rem<Self, Output = Self>
311
            + Mul<Self, Output = Self>
312
            + Not
313
            + Sub<Self, Output = Self>
314
            + BitAnd<Self, Output = Self>
315
            + WrappingAdd
316
            + WrappingNeg,
317
    > Key for T
318
{
319
}
320

            
321
/// Backing storage for a specific key type
322
///
323
/// Intended to be smaller than or equal in size to the full Key type.
324
pub(crate) trait KeyStorage<K>:
325
    Copy + Zero + Not<Output = Self> + TryFrom<K> + TryInto<K>
326
where
327
    K: Key,
328
{
329
    /// Fit the indicated key into a [`KeyStorage`], wrapping if necessary.
330
    ///
331
    /// It is normal for keys to accumulate additional insignificant bits on
332
    /// the left side as we compute sums.
333
    #[inline(always)]
334
933787299
    fn from_key(k: K) -> Self {
335
933787299
        let key_mask = (!Self::zero()).into_key();
336
933787299
        <K as TryInto<Self>>::try_into(k & key_mask)
337
933787299
            .map_err(|_| ())
338
933787299
            .expect("masked Key type always fits in KeyStorage")
339
933787299
    }
340

            
341
    /// Unpack this [`KeyStorage`] back into a Key type, without
342
    /// changing its value.
343
    #[inline(always)]
344
2841571404
    fn into_key(self) -> K {
345
2841571404
        self.try_into()
346
2841571404
            .map_err(|_| ())
347
2841571404
            .expect("Key type is always wider than KeyStorage")
348
2841571404
    }
349
}
350

            
351
impl<T: Copy + Zero + Not<Output = Self> + TryFrom<K> + TryInto<K>, K: Key> KeyStorage<K> for T {}