equix/bucket_array/
hash.rs

1//! Hash table layer
2//!
3//! This uses the minimal utilities in [`crate::bucket_array::mem`] to build
4//! a slightly higher level data structure that's meant to be used as a hash
5//! table for uniformly distributed keys. The single and paired bucket arrays
6//! from [`crate::bucket_array::mem`] are wrapped into concrete value-only
7//! and key-value hash table stores, and unified again via common traits.
8//!
9//! This introduces new data types and conversions that let us represent one
10//! item key in multiple formats: the full size key, its implied bucket number,
11//! the remaining unused key bits, and a packed version of that remainder.
12
13use crate::bucket_array::mem::{self, BucketArrayMemory};
14use num_traits::{One, WrappingAdd, WrappingNeg, Zero};
15use std::marker::PhantomData;
16use std::ops::{BitAnd, Div, Mul, Not, Range, Rem, Shl, Shr, Sub};
17
18/// Trait for accessing the overall shape of a bucket array
19pub(crate) trait Shape<K: Key> {
20    /// The number of buckets in this array
21    const NUM_BUCKETS: usize;
22
23    /// Get the range of items within a single bucket.
24    fn item_range(&self, bucket: usize) -> Range<usize>;
25
26    /// Get the key divisor: the number of buckets but as a [`Key`] instance.
27    #[inline(always)]
28    fn divisor(&self) -> K {
29        K::from_bucket_index(Self::NUM_BUCKETS)
30    }
31
32    /// Split a wide key into the bucket index and the remaining bits.
33    #[inline(always)]
34    fn split_wide_key(&self, key: K) -> (usize, K) {
35        let divisor = self.divisor();
36        ((key % divisor).into_bucket_index(), (key / divisor))
37    }
38
39    /// Rebuild a wide key from its split components.
40    #[inline(always)]
41    fn join_wide_key(&self, bucket: usize, remainder: K) -> K {
42        let divisor = self.divisor();
43        remainder * divisor + K::from_bucket_index(bucket)
44    }
45}
46
47/// Trait for writing new key/value pairs to a bucket array
48pub(crate) trait Insert<K: Key, V: Copy> {
49    /// Append a new item to its sorting bucket, or return Err(()) if it's full
50    fn insert(&mut self, key: K, value: V) -> Result<(), ()>;
51}
52
53/// Trait for bucket arrays that include storage for keys
54///
55/// Keys are always used to index into the bucket array, but an array may
56/// also optionally include storage for the remaining portion.
57pub(crate) trait KeyLookup<S: KeyStorage<K>, K: Key> {
58    /// Retrieve the stored key remainder bits for this item
59    fn item_stored_key(&self, bucket: usize, item: usize) -> S;
60
61    /// Retrieve the key for a particular item, as a full width key
62    fn item_full_key(&self, bucket: usize, item: usize) -> K;
63}
64
65/// Trait for bucket arrays that include storage for values
66///
67/// Values are opaque data, any [`Copy`] type may be used.
68pub(crate) trait ValueLookup<V: Copy> {
69    /// Retrieve the Value for a particular item
70    fn item_value(&self, bucket: usize, item: usize) -> V;
71}
72
73/// Concrete bucket array with parallel [`BucketArrayMemory`] for key and value
74/// storage
75///
76/// This is the basic data type used for one layer of sorting buckets.
77///
78/// Takes several type parameters so the narrowest possible types can be
79/// chosen for counters, keys, and values. Keys take two types: the 'wide'
80/// version that appears in the API and a 'storage' version that's been
81/// stripped of the data redundant with its bucket position.
82///
83/// The validity of [`BucketArrayMemory`] entries is ensured by the combination
84/// of our mutable ref to the `BucketArrayMemory` itself and our tracking of
85/// bucket counts within the lifetime of that reference.
86pub(crate) struct KeyValueBucketArray<
87    // Lifetime for mutable reference to key storage memory
88    'k,
89    // Lifetime for mutable reference to value storage memory
90    'v,
91    // Number of buckets
92    const N: usize,
93    // Maximum number of items per bucket
94    const CAP: usize,
95    // Type for bucket item counts
96    C: Count,
97    // Full size key type, used in the API and for calculating bucket indices
98    K: Key,
99    // Storage type for keys, potentially smaller than `K`
100    KS: KeyStorage<K>,
101    // Value type
102    V: Copy,
103>(
104    mem::BucketArrayPair<'k, 'v, N, CAP, C, KS, V>,
105    PhantomData<K>,
106);
107
108impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
109    KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
110{
111    /// A new [`KeyValueBucketArray`] wraps two mutable [`BucketArrayMemory`]
112    /// references and adds state to track which items are valid.
113    pub(crate) fn new(
114        key_mem: &'k mut BucketArrayMemory<N, CAP, KS>,
115        value_mem: &'v mut BucketArrayMemory<N, CAP, V>,
116    ) -> Self {
117        Self(mem::BucketArrayPair::new(key_mem, value_mem), PhantomData)
118    }
119
120    /// Keep the counts and the value memory but drop the key memory.
121    ///
122    /// Returns a new [`ValueBucketArray`].
123    pub(crate) fn drop_key_storage(self) -> ValueBucketArray<'v, N, CAP, C, K, V> {
124        ValueBucketArray(self.0.drop_first(), self.1)
125    }
126}
127
128/// Concrete bucket array with a single [`BucketArrayMemory`] for value storage
129///
130/// Keys are used for bucket indexing but the remainder bits are not stored.
131pub(crate) struct ValueBucketArray<
132    // Lifetime for mutable reference to value storage memory
133    'v,
134    // Number of buckets
135    const N: usize,
136    // Maximum number of items per bucket
137    const CAP: usize,
138    // Type for bucket item counts
139    C: Count,
140    // Full size key type, used in the API and for calculating bucket indices
141    K: Key,
142    // Value type
143    V: Copy,
144>(mem::BucketArray<'v, N, CAP, C, V>, PhantomData<K>);
145
146impl<'v, const N: usize, const CAP: usize, C: Count, K: Key, V: Copy>
147    ValueBucketArray<'v, N, CAP, C, K, V>
148{
149    /// A new [`ValueBucketArray`] wraps one mutable [`BucketArrayMemory`]
150    /// reference and adds a counts array to track which items are valid.
151    pub(crate) fn new(value_mem: &'v mut BucketArrayMemory<N, CAP, V>) -> Self {
152        Self(mem::BucketArray::new(value_mem), PhantomData)
153    }
154}
155
156impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
157    Shape<K> for KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
158{
159    /// Number of buckets in the array
160    const NUM_BUCKETS: usize = N;
161
162    #[inline(always)]
163    fn item_range(&self, bucket: usize) -> Range<usize> {
164        self.0.item_range(bucket)
165    }
166}
167
168impl<'v, const N: usize, const CAP: usize, C: Count, K: Key, V: Copy> Shape<K>
169    for ValueBucketArray<'v, N, CAP, C, K, V>
170{
171    /// Number of buckets in the array
172    const NUM_BUCKETS: usize = N;
173
174    #[inline(always)]
175    fn item_range(&self, bucket: usize) -> Range<usize> {
176        self.0.item_range(bucket)
177    }
178}
179
180impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
181    Insert<K, V> for KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
182{
183    #[inline(always)]
184    fn insert(&mut self, key: K, value: V) -> Result<(), ()> {
185        let (bucket, key_remainder) = self.split_wide_key(key);
186        let key_storage = KS::from_key(key_remainder);
187        self.0.insert(bucket, key_storage, value)
188    }
189}
190
191impl<'v, const N: usize, const CAP: usize, C: Count, K: Key, V: Copy> Insert<K, V>
192    for ValueBucketArray<'v, N, CAP, C, K, V>
193{
194    #[inline(always)]
195    fn insert(&mut self, key: K, value: V) -> Result<(), ()> {
196        let (bucket, _) = self.split_wide_key(key);
197        self.0.insert(bucket, value)
198    }
199}
200
201impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
202    KeyLookup<KS, K> for KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
203{
204    #[inline(always)]
205    fn item_stored_key(&self, bucket: usize, item: usize) -> KS {
206        self.0.item_value_first(bucket, item)
207    }
208
209    #[inline(always)]
210    fn item_full_key(&self, bucket: usize, item: usize) -> K {
211        self.join_wide_key(bucket, self.item_stored_key(bucket, item).into_key())
212    }
213}
214
215impl<'k, 'v, const N: usize, const CAP: usize, C: Count, K: Key, KS: KeyStorage<K>, V: Copy>
216    ValueLookup<V> for KeyValueBucketArray<'k, 'v, N, CAP, C, K, KS, V>
217{
218    #[inline(always)]
219    fn item_value(&self, bucket: usize, item: usize) -> V {
220        self.0.item_value_second(bucket, item)
221    }
222}
223
224impl<'v, const N: usize, const CAP: usize, C: Count, K: Key, V: Copy> ValueLookup<V>
225    for ValueBucketArray<'v, N, CAP, C, K, V>
226{
227    #[inline(always)]
228    fn item_value(&self, bucket: usize, item: usize) -> V {
229        self.0.item_value(bucket, item)
230    }
231}
232
233/// Trait for types that can be used as a count of items in a bucket
234///
235/// Whereas [`mem::Count`] is meant to be the minimum for that module's
236/// purposes, this is an extended trait with features needed by the rest of
237/// the crate.
238pub(crate) trait Count: mem::Count + TryFrom<usize> {
239    /// Convert from a usize item index, panic on overflow
240    #[inline(always)]
241    fn from_item_index(i: usize) -> Self {
242        // Omit the original error type, to avoid propagating Debug bounds
243        // for this trait. We might be able to stop doing this once the
244        // associated_type_bounds Rust feature stabilizes.
245        i.try_into()
246            .map_err(|_| ())
247            .expect("Bucket count type is always wide enough for item index")
248    }
249}
250
251impl<T: mem::Count + TryFrom<usize>> Count for T {}
252
253/// Types we can use as full width keys
254pub(crate) trait Key:
255    Copy
256    + Zero
257    + One
258    + PartialEq<Self>
259    + TryFrom<usize>
260    + TryInto<usize>
261    + Shl<usize, Output = Self>
262    + Shr<usize, Output = Self>
263    + Div<Self, Output = Self>
264    + Rem<Self, Output = Self>
265    + Mul<Self, Output = Self>
266    + Not
267    + Sub<Self, Output = Self>
268    + BitAnd<Self, Output = Self>
269    + WrappingAdd
270    + WrappingNeg
271{
272    /// Build a Key from a bucket index.
273    ///
274    /// Panics if the index is out of range.
275    #[inline(always)]
276    fn from_bucket_index(i: usize) -> Self {
277        i.try_into()
278            .map_err(|_| ())
279            .expect("Key type is always wide enough for a bucket index")
280    }
281
282    /// Convert this Key back into a bucket index.
283    ///
284    /// Panics if the value would not fit in a `usize`. No other range
285    /// checking on the bucket index is enforced here.
286    #[inline(always)]
287    fn into_bucket_index(self) -> usize {
288        self.try_into()
289            .map_err(|_| ())
290            .expect("Key is a bucket index which always fits in a usize")
291    }
292
293    /// Check if the N low bits of the key are zero.
294    #[inline(always)]
295    fn low_bits_are_zero(self, num_bits: usize) -> bool {
296        (self & ((Self::one() << num_bits) - Self::one())) == Self::zero()
297    }
298}
299
300impl<
301        T: Copy
302            + Zero
303            + One
304            + PartialEq<Self>
305            + TryFrom<usize>
306            + TryInto<usize>
307            + Shl<usize, Output = Self>
308            + Shr<usize, Output = Self>
309            + Div<Self, Output = Self>
310            + Rem<Self, Output = Self>
311            + Mul<Self, Output = Self>
312            + Not
313            + Sub<Self, Output = Self>
314            + BitAnd<Self, Output = Self>
315            + WrappingAdd
316            + WrappingNeg,
317    > Key for T
318{
319}
320
321/// Backing storage for a specific key type
322///
323/// Intended to be smaller than or equal in size to the full Key type.
324pub(crate) trait KeyStorage<K>:
325    Copy + Zero + Not<Output = Self> + TryFrom<K> + TryInto<K>
326where
327    K: Key,
328{
329    /// Fit the indicated key into a [`KeyStorage`], wrapping if necessary.
330    ///
331    /// It is normal for keys to accumulate additional insignificant bits on
332    /// the left side as we compute sums.
333    #[inline(always)]
334    fn from_key(k: K) -> Self {
335        let key_mask = (!Self::zero()).into_key();
336        <K as TryInto<Self>>::try_into(k & key_mask)
337            .map_err(|_| ())
338            .expect("masked Key type always fits in KeyStorage")
339    }
340
341    /// Unpack this [`KeyStorage`] back into a Key type, without
342    /// changing its value.
343    #[inline(always)]
344    fn into_key(self) -> K {
345        self.try_into()
346            .map_err(|_| ())
347            .expect("Key type is always wider than KeyStorage")
348    }
349}
350
351impl<T: Copy + Zero + Not<Output = Self> + TryFrom<K> + TryInto<K>, K: Key> KeyStorage<K> for T {}