1
use core::panic;
2
use std::fmt;
3
use std::hash::BuildHasher;
4
use std::hash::Hash;
5
use std::hash::Hasher;
6
use std::ops::Deref;
7
use std::ops::Index;
8
use std::ops::IndexMut;
9

            
10
use hashbrown::Equivalent;
11
use hashbrown::HashSet;
12
use rustc_hash::FxBuildHasher;
13

            
14
use merc_utilities::GenerationCounter;
15
use merc_utilities::GenerationalIndex;
16
use merc_utilities::NoHasherBuilder;
17
use merc_utilities::cast;
18

            
19
/// A type-safe index for use with [IndexedSet]. Uses generational indices in debug builds to assert
20
/// correct usage of indices.
21
#[repr(transparent)]
22
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
23
pub struct SetIndex(GenerationalIndex<usize>);
24

            
25
impl Deref for SetIndex {
26
    type Target = usize;
27

            
28
1103195687
    fn deref(&self) -> &Self::Target {
29
1103195687
        &self.0
30
1103195687
    }
31
}
32

            
33
impl fmt::Debug for SetIndex {
34
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
35
        write!(f, "SetIndex({})", self.0)
36
    }
37
}
38

            
39
impl fmt::Display for SetIndex {
40
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41
        write!(f, "{}", self.0)
42
    }
43
}
44

            
45
/// A set that assigns a unique index to every entry. The returned index can be used to access the inserted entry.
46
pub struct IndexedSet<T, S = FxBuildHasher> {
47
    /// The table of elements, which can be either filled or empty.
48
    table: Vec<IndexSetEntry<T>>,
49
    /// Indexes of the elements in the set, using NoHasher to directly use precomputed hashes.
50
    index: HashSet<IndexEntry, NoHasherBuilder>,
51
    /// A list of free nodes, where the value is the first free node.
52
    free: Option<usize>,
53
    /// The number of generations
54
    generation_counter: GenerationCounter,
55
    /// The hasher used to compute hashes for elements
56
    hasher: S,
57
}
58

            
59
/// An entry in the indexed set, which can either be filled or empty.
60
enum IndexSetEntry<T> {
61
    Filled(T),
62
    Empty(usize),
63
}
64

            
65
impl<T, S: BuildHasher + Default> IndexedSet<T, S> {
66
    /// Creates a new empty IndexedSet with the default hasher.
67
9029
    pub fn new() -> IndexedSet<T, S> {
68
9029
        IndexedSet {
69
9029
            table: Vec::default(),
70
9029
            index: HashSet::with_hasher(NoHasherBuilder),
71
9029
            free: None,
72
9029
            generation_counter: GenerationCounter::new(),
73
9029
            hasher: S::default(),
74
9029
        }
75
9029
    }
76
}
77

            
78
impl<T, S> IndexedSet<T, S> {
79
    /// Creates a new empty IndexedSet with the specified hasher.
80
    pub fn with_hasher(hash_builder: S) -> IndexedSet<T, S> {
81
        IndexedSet {
82
            table: Vec::default(),
83
            index: HashSet::with_hasher(NoHasherBuilder),
84
            free: None,
85
            generation_counter: GenerationCounter::new(),
86
            hasher: hash_builder,
87
        }
88
    }
89

            
90
    /// Returns the number of elements in the set.
91
18241435
    pub fn len(&self) -> usize {
92
18241435
        self.table.len()
93
18241435
    }
94

            
95
    /// Returns true if the set is empty.
96
    pub fn is_empty(&self) -> bool {
97
        self.len() == 0
98
    }
99

            
100
    /// Returns a reference to the element at the given index, if it exists.
101
81554364
    pub fn get(&self, index: SetIndex) -> Option<&T> {
102
81554364
        if let Some(entry) = self.table.get(self.generation_counter.get_index(index.0)) {
103
81554364
            match entry {
104
81554364
                IndexSetEntry::Filled(element) => Some(element),
105
                IndexSetEntry::Empty(_) => None,
106
            }
107
        } else {
108
            None
109
        }
110
81554364
    }
111

            
112
    /// Returns the capacity of the set.
113
    pub fn capacity(&self) -> usize {
114
        self.table.capacity()
115
    }
116

            
117
    /// Returns an iterator over the elements in the set.
118
3540
    pub fn iter(&self) -> Iter<'_, T, S> {
119
3540
        Iter {
120
3540
            reference: self,
121
3540
            index: 0,
122
3540
            generation_counter: &self.generation_counter,
123
3540
        }
124
3540
    }
125
}
126

            
127
impl<T: Clone, S> IndexedSet<T, S> {
128
    /// Returns a vector containing all elements of this indexed set.
129
3100
    pub fn to_vec(&self) -> Vec<T> {
130
15900
        self.iter().map(|(_, entry)| entry.clone()).collect()
131
3100
    }
132
}
133

            
134
impl<T: Hash + Eq, S: BuildHasher> IndexedSet<T, S> {
135
    /// Inserts the given element into the set
136
    ///
137
    /// Returns the corresponding index and a boolean indicating if the element was inserted.
138
    pub fn insert_equiv<'a, Q>(&mut self, value: &'a Q) -> (SetIndex, bool)
139
    where
140
        Q: Hash + Equivalent<T>,
141
        T: From<&'a Q>,
142
    {
143
        let equivalent = IndexValueEquivalent::new(value, &self.hasher, &self.table);
144

            
145
        if let Some(entry) = self.index.get(&equivalent) {
146
            // The element is already in the set, so return the index.
147
            return (SetIndex(self.generation_counter.recall_index(entry.index)), false);
148
        }
149

            
150
        let value: T = value.into();
151
        let hash = self.hasher.hash_one(&value);
152

            
153
        debug_assert_eq!(hash, equivalent.hash(), "Hash values should be the same");
154

            
155
        let index = match self.free {
156
            Some(first) => {
157
                let next = match self.table[first] {
158
                    IndexSetEntry::Empty(x) => x,
159
                    IndexSetEntry::Filled(_) => panic!("The free list contains a filled element"),
160
                };
161

            
162
                if first == next {
163
                    // The list is now empty as its first element points to itself.
164
                    self.free = None;
165
                } else {
166
                    // Update free to be the next element in the list.
167
                    self.free = Some(next);
168
                }
169

            
170
                self.table[first] = IndexSetEntry::Filled(value);
171
                first
172
            }
173
            None => {
174
                // No free positions so insert new.
175
                self.table.push(IndexSetEntry::Filled(value));
176
                self.table.len() - 1
177
            }
178
        };
179

            
180
        self.index.insert(IndexEntry::new(index, hash));
181
        (SetIndex(self.generation_counter.create_index(index)), true)
182
    }
183

            
184
    /// Inserts the given element into the set
185
    ///
186
    /// Returns the corresponding index and a boolean indicating if the element was inserted.
187
9531410
    pub fn insert(&mut self, value: T) -> (SetIndex, bool) {
188
9531410
        let equivalent = IndexValueEquivalent::new(&value, &self.hasher, &self.table);
189

            
190
9531410
        if let Some(entry) = self.index.get(&equivalent) {
191
            // The element is already in the set, so return the index.
192
3670221
            return (SetIndex(self.generation_counter.recall_index(entry.index)), false);
193
5861189
        }
194

            
195
5861189
        let hash = equivalent.hash();
196

            
197
5861189
        let index = match self.free {
198
            Some(first) => {
199
                let next = match self.table[first] {
200
                    IndexSetEntry::Empty(x) => x,
201
                    IndexSetEntry::Filled(_) => panic!("The free list contains a filled element"),
202
                };
203

            
204
                if first == next {
205
                    // The list is now empty as its first element points to itself.
206
                    self.free = None;
207
                } else {
208
                    // Update free to be the next element in the list.
209
                    self.free = Some(next);
210
                }
211

            
212
                self.table[first] = IndexSetEntry::Filled(value);
213
                first
214
            }
215
            None => {
216
                // No free positions so insert new.
217
5861189
                self.table.push(IndexSetEntry::Filled(value));
218
5861189
                self.table.len() - 1
219
            }
220
        };
221

            
222
5861189
        self.index.insert(IndexEntry::new(index, hash));
223
5861189
        (SetIndex(self.generation_counter.create_index(index)), true)
224
9531410
    }
225

            
226
    /// Returns the index for the given element, or None if it does not exist.
227
1092573
    pub fn index<Q>(&self, key: &Q) -> Option<SetIndex>
228
1092573
    where
229
1092573
        Q: Hash + Equivalent<T>,
230
    {
231
1092573
        let equivalent = IndexValueEquivalent::new(key, &self.hasher, &self.table);
232

            
233
1092573
        self.index
234
1092573
            .get(&equivalent)
235
1092573
            .map(|entry| SetIndex(self.generation_counter.recall_index(entry.index)))
236
1092573
    }
237

            
238
    /// Erases all elements for which f(index, element) returns false. Allows
239
    /// modifying the given element (as long as the hash/equality does not change).
240
240
    pub fn retain_mut<F>(&mut self, mut f: F)
241
240
    where
242
240
        F: FnMut(SetIndex, &mut T) -> bool,
243
    {
244
435140
        for (index, element) in self.table.iter_mut().enumerate() {
245
435140
            if let IndexSetEntry::Filled(value) = element {
246
224156
                if !f(SetIndex(self.generation_counter.recall_index(index)), value) {
247
                    // Find and remove the IndexEntry from the index set
248
584480637
                    let entry_to_remove = self.index.iter().find(|entry| entry.index == index).cloned();
249

            
250
214566
                    if let Some(entry) = entry_to_remove {
251
214566
                        self.index.remove(&entry);
252
214566
                    }
253

            
254
214566
                    match self.free {
255
214446
                        Some(next) => {
256
214446
                            *element = IndexSetEntry::Empty(next);
257
214446
                        }
258
120
                        None => {
259
120
                            *element = IndexSetEntry::Empty(index);
260
120
                        }
261
                    };
262
214566
                    self.free = Some(index);
263
9590
                }
264
210984
            };
265
        }
266
240
    }
267

            
268
    /// Removes the given element from the set.
269
1000
    pub fn remove(&mut self, element: &T) -> bool {
270
1000
        let equivalent = IndexValueEquivalent::new(element, &self.hasher, &self.table);
271

            
272
1000
        if let Some(entry) = self.index.take(&equivalent) {
273
868
            let next = match self.free {
274
768
                Some(next) => next,
275
100
                None => entry.index,
276
            };
277

            
278
868
            self.table[entry.index] = IndexSetEntry::Empty(next);
279
868
            self.free = Some(entry.index);
280
868
            true
281
        } else {
282
            // The element was not found in the set.
283
132
            false
284
        }
285
1000
    }
286

            
287
    /// Returns true iff the set contains the given element.
288
1861717
    pub fn contains<Q>(&self, element: &Q) -> bool
289
1861717
    where
290
1861717
        Q: Hash + Equivalent<T>,
291
    {
292
        // Compute the hash using our hash_builder
293
1861717
        let equivalent = IndexValueEquivalent::new(element, &self.hasher, &self.table);
294
1861717
        self.index.contains(&equivalent)
295
1861717
    }
296
}
297

            
298
impl<T, S> fmt::Debug for IndexedSet<T, S>
299
where
300
    T: fmt::Debug,
301
{
302
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
303
        f.debug_list().entries(self.iter()).finish()
304
    }
305
}
306

            
307
impl<T, S: BuildHasher + Default> Default for IndexedSet<T, S> {
308
100
    fn default() -> IndexedSet<T, S> {
309
100
        IndexedSet::new()
310
100
    }
311
}
312

            
313
impl<T, S> Index<SetIndex> for IndexedSet<T, S> {
314
    type Output = T;
315

            
316
81538681
    fn index(&self, index: SetIndex) -> &Self::Output {
317
81538681
        cast!(&self.table[*index], IndexSetEntry::Filled)
318
81538681
    }
319
}
320

            
321
impl<T, S: BuildHasher> IndexMut<SetIndex> for IndexedSet<T, S> {
322
19060
    fn index_mut(&mut self, index: SetIndex) -> &mut Self::Output {
323
19060
        cast!(&mut self.table[*index], IndexSetEntry::Filled)
324
19060
    }
325
}
326

            
327
/// An entry in the index that stores both the index and a precomputed hash value
328
#[derive(Copy, Clone, PartialEq, Eq)]
329
struct IndexEntry {
330
    /// The index into the table
331
    index: usize,
332
    /// Precomputed hash value of the element at this index
333
    hash: u64,
334
}
335

            
336
impl IndexEntry {
337
    /// Creates a new IndexEntry with the given index and hash value
338
64293811
    fn new(index: usize, hash: u64) -> Self {
339
64293811
        Self { index, hash }
340
64293811
    }
341
}
342

            
343
impl Hash for IndexEntry {
344
15932757
    fn hash<H: Hasher>(&self, state: &mut H) {
345
        // Simply use the precomputed hash value
346
15932757
        state.write_u64(self.hash);
347
15932757
    }
348
}
349

            
350
/// An equivalent wrapper that allows looking up elements in a set using the original value.
351
/// This avoids duplicating the key in both the table and index.
352
struct IndexValueEquivalent<'a, T, Q> {
353
    value: &'a Q,
354
    hash: u64,
355
    table: &'a Vec<IndexSetEntry<T>>,
356
}
357

            
358
impl<T, Q> IndexValueEquivalent<'_, T, Q> {
359
5861189
    fn hash(&self) -> u64 {
360
        // This is a placeholder for the actual hash function
361
5861189
        self.hash
362
5861189
    }
363
}
364

            
365
impl<'a, T, Q: Hash> IndexValueEquivalent<'a, T, Q> {
366
    /// Creates a new IndexValueEquivalent with the given value and table.
367
12486700
    fn new<S: BuildHasher>(value: &'a Q, hasher: &S, table: &'a Vec<IndexSetEntry<T>>) -> Self {
368
        // Constructor allows for centralized creation logic
369
12486700
        Self {
370
12486700
            value,
371
12486700
            table,
372
12486700
            hash: hasher.hash_one(value),
373
12486700
        }
374
12486700
    }
375
}
376

            
377
impl<T, Q: Equivalent<T>> Equivalent<IndexEntry> for IndexValueEquivalent<'_, T, Q> {
378
6080210
    fn equivalent(&self, key: &IndexEntry) -> bool {
379
6080210
        if let Some(IndexSetEntry::Filled(element)) = self.table.get(key.index) {
380
6080210
            self.value.equivalent(element)
381
        } else {
382
            false
383
        }
384
6080210
    }
385
}
386

            
387
impl<T, Q> Hash for IndexValueEquivalent<'_, T, Q> {
388
12473163
    fn hash<H: Hasher>(&self, state: &mut H) {
389
12473163
        state.write_u64(self.hash);
390
12473163
    }
391
}
392

            
393
/// An iterator over the elements in the IndexedSet.
394
pub struct Iter<'a, T, S> {
395
    reference: &'a IndexedSet<T, S>,
396
    index: usize,
397
    generation_counter: &'a GenerationCounter,
398
}
399

            
400
impl<'a, T, S> Iterator for Iter<'a, T, S> {
401
    type Item = (SetIndex, &'a T);
402

            
403
34298
    fn next(&mut self) -> Option<Self::Item> {
404
460716
        while self.index < self.reference.table.len() {
405
457176
            let current_index = self.index;
406
457176
            self.index += 1;
407

            
408
457176
            if let IndexSetEntry::Filled(element) = &self.reference.table[current_index] {
409
30758
                return Some((SetIndex(self.generation_counter.recall_index(current_index)), element));
410
426418
            }
411
        }
412

            
413
3540
        None
414
34298
    }
415
}
416

            
417
impl<'a, T, S> IntoIterator for &'a IndexedSet<T, S> {
418
    type Item = (SetIndex, &'a T);
419
    type IntoIter = Iter<'a, T, S>;
420

            
421
440
    fn into_iter(self) -> Self::IntoIter {
422
440
        self.iter()
423
440
    }
424
}
425

            
426
#[cfg(test)]
427
mod tests {
428
    use std::collections::HashMap;
429

            
430
    use rand::Rng;
431

            
432
    use merc_utilities::random_test;
433

            
434
    use crate::IndexedSet;
435
    use crate::SetIndex;
436

            
437
    #[test]
438
1
    fn test_random_indexed_set_construction() {
439
100
        random_test(100, |rng| {
440
100
            let mut input = vec![];
441
10000
            for _ in 0..100 {
442
10000
                input.push(rng.random_range(0..32) as usize);
443
10000
            }
444

            
445
100
            let mut indices: HashMap<usize, SetIndex> = HashMap::default();
446

            
447
            // Insert several elements and keep track of the resulting indices.
448
100
            let mut set: IndexedSet<usize> = IndexedSet::default();
449
10000
            for element in &input {
450
10000
                let index = set.insert(*element).0;
451
10000
                indices.insert(*element, index);
452
10000
            }
453

            
454
            // Check if the indices match the previously stored ones.
455
3068
            for (index, value) in &set {
456
3068
                assert_eq!(
457
3068
                    indices[value], index,
458
                    "The resulting index does not match the returned value"
459
                );
460
            }
461

            
462
            // Remove some elements from the set.
463
1000
            for value in &mut input.iter().take(10) {
464
1000
                set.remove(value);
465
1000
                indices.remove(value);
466
1000
            }
467

            
468
            // Check consistency of the indexed set after removals.
469
2200
            for (index, value) in &set {
470
2200
                assert_eq!(
471
2200
                    indices[value], index,
472
                    "The resulting index does not match the returned value"
473
                );
474
            }
475

            
476
2200
            for (value, index) in &indices {
477
2200
                assert!(
478
2200
                    set.get(*index) == Some(value),
479
                    "Index {} should still match element {:?}",
480
                    *index,
481
                    value
482
                );
483
            }
484

            
485
            // Check the contains function
486
10000
            for value in &input {
487
10000
                let contains = indices.contains_key(value);
488
10000
                assert_eq!(
489
10000
                    set.contains(value),
490
                    contains,
491
                    "The contains function returned an incorrect result for value {:?}",
492
                    value
493
                );
494
            }
495
100
        })
496
1
    }
497
}