1
use std::cmp::Ordering;
2
use std::fmt;
3
use std::marker::PhantomData;
4
use std::slice::Iter;
5

            
6
use itertools::Itertools;
7

            
8
#[macro_export]
9
macro_rules! vecset {
10
    () => {
11
        $crate::VecSet::new()
12
    };
13
    ($elem:expr; $n:expr) => {{
14
        let mut __set = $crate::VecSet::new();
15
        let __count: usize = $n;
16
        if __count > 0 {
17
            __set.insert($elem);
18
        }
19
        __set
20
    }};
21
    ($($x:expr),+ $(,)?) => {{
22
        let mut __set = $crate::VecSet::new();
23
        $( let _ = __set.insert($x); )*
24
        __set
25
    }};
26
}
27

            
28
///
29
/// A set that is internally represented by a sorted vector. Mostly useful for
30
/// a compact representation of sets that are not changed often.
31
///
32
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
33
pub struct VecSet<T> {
34
    /// The internal storage with the invariant that the array is sorted.
35
    sorted_array: Vec<T>,
36
}
37

            
38
impl<T: Ord> VecSet<T> {
39
    /// Creates a new, empty VecSet.
40
12789
    pub fn new() -> Self {
41
12789
        Self {
42
12789
            sorted_array: Vec::new(),
43
12789
        }
44
12789
    }
45

            
46
    /// Creates a VecSet from the given vector without assuming anything of the given vector.
47
32885
    pub fn from_vec(mut vec: Vec<T>) -> Self {
48
32885
        vec.sort_unstable();
49
32885
        vec.dedup();
50
32885
        Self { sorted_array: vec }
51
32885
    }
52

            
53
    /// Returns the capacity of the set.
54
    pub fn capacity(&self) -> usize {
55
        self.sorted_array.capacity()
56
    }
57

            
58
    /// Returns true iff the set contains the given element.
59
    pub fn contains(&self, element: &T) -> bool {
60
        self.sorted_array.binary_search(element).is_ok()
61
    }
62

            
63
    /// Clears the set, removing all elements.
64
    pub fn clear(&mut self) {
65
        self.sorted_array.clear();
66
    }
67

            
68
    /// Retains only the elements specified by the predicate.
69
4602
    pub fn retain<F>(&mut self, mut f: F)
70
4602
    where
71
4602
        F: FnMut(&T) -> bool,
72
    {
73
        // Removing elements does not change the order.
74
11899
        self.sorted_array.retain(|e| f(e));
75
4602
    }
76

            
77
    /// Returns true iff this set is a subset of the other set.
78
58261
    pub fn is_subset(&self, other: &VecSet<T>) -> bool {
79
58261
        let mut self_iter = self.sorted_array.iter();
80
58261
        let mut other_iter = other.sorted_array.iter();
81

            
82
        // Traverse both sets in order, checking that all elements of self are in other.
83
58261
        let mut self_next = self_iter.next();
84
58261
        let mut other_next = other_iter.next();
85

            
86
116080
        while let Some(self_val) = self_next {
87
114003
            match other_next {
88
102344
                Some(other_val) => {
89
102344
                    match self_val.cmp(other_val) {
90
14364
                        Ordering::Equal => {
91
14364
                            self_next = self_iter.next();
92
14364
                            other_next = other_iter.next();
93
14364
                        }
94
43455
                        Ordering::Greater => other_next = other_iter.next(),
95
44525
                        Ordering::Less => return false, // self_val is smaller, not in other
96
                    }
97
                }
98
11659
                None => return false, // other is exhausted
99
            }
100
        }
101

            
102
2077
        true
103
58261
    }
104

            
105
    /// Returns a new set only containing the given element.
106
12656
    pub fn singleton(element: T) -> Self {
107
12656
        Self {
108
12656
            sorted_array: vec![element],
109
12656
        }
110
12656
    }
111

            
112
    /// Returns true iff the set is empty.
113
13882
    pub fn is_empty(&self) -> bool {
114
13882
        self.sorted_array.is_empty()
115
13882
    }
116

            
117
    /// Returns the difference of this set and the other set.
118
2071
    pub fn difference<'a>(&'a self, other: &'a VecSet<T>) -> impl Iterator<Item = &'a T> {
119
2071
        Difference::<'a, T, _> {
120
2071
            self_iter: self.sorted_array.iter(),
121
2071
            other_iter: other.sorted_array.iter(),
122
2071
            other_next: None,
123
2071
            marker: PhantomData,
124
2071
        }
125
2071
    }
126

            
127
    /// Inserts the given element into the set, returns true iff the element was
128
    /// inserted.
129
33992
    pub fn insert(&mut self, element: T) -> bool {
130
        // Finds the location where to insert the element to keep the array sorted.
131
33992
        if let Err(position) = self.sorted_array.binary_search(&element) {
132
32192
            self.sorted_array.insert(position, element);
133
32192
            return true;
134
1800
        }
135

            
136
1800
        false
137
33992
    }
138

            
139
    /// Extends this set with the elements from the given iterator, returns true iff at least one element was inserted.
140
1038
    pub fn extend<'a, I: IntoIterator<Item = &'a T>>(&mut self, iter: I) -> bool
141
1038
    where
142
1038
        T: Clone + 'a,
143
    {
144
1038
        let mut inserted = false;
145
10540
        for element in iter {
146
10540
            if self.insert(element.clone()) {
147
9360
                inserted = true;
148
9360
            }
149
        }
150

            
151
1038
        inserted
152
1038
    }
153

            
154
    /// Returns an iterator over the elements in the set, they are yielded in sorted order.
155
18070
    pub fn iter(&self) -> impl Iterator<Item = &T> {
156
18070
        self.sorted_array.iter()
157
18070
    }
158

            
159
    /// Returns the number of elements in the set.
160
1
    pub fn len(&self) -> usize {
161
1
        self.sorted_array.len()
162
1
    }
163

            
164
    /// Consumes the set and returns a vector with the elements in sorted order.
165
8806
    pub fn to_vec(self) -> Vec<T> {
166
8806
        self.sorted_array
167
8806
    }
168
}
169

            
170
impl<T: Ord> FromIterator<T> for VecSet<T> {
171
23402
    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
172
23402
        Self::from_vec(iter.into_iter().collect())
173
23402
    }
174
}
175

            
176
/// A lazy iterator that yields the difference of two sets. The elements are yielded in sorted order.
177
struct Difference<'a, T, I> {
178
    self_iter: I,
179
    other_iter: I,
180
    other_next: Option<&'a T>,
181
    marker: PhantomData<&'a T>,
182
}
183

            
184
impl<'a, T, I> Iterator for Difference<'a, T, I>
185
where
186
    I: Iterator<Item = &'a T>,
187
    T: Ord + PartialEq,
188
{
189
    type Item = &'a T;
190

            
191
9095
    fn next(&mut self) -> Option<Self::Item> {
192
9095
        if self.other_next.is_none() {
193
6676
            self.other_next = self.other_iter.next();
194
7069
        }
195

            
196
9095
        for self_val in self.self_iter.by_ref() {
197
            loop {
198
9388
                match self.other_next {
199
4783
                    Some(other_val) => {
200
4783
                        match self_val.cmp(other_val) {
201
                            Ordering::Equal => {
202
1717
                                self.other_next = self.other_iter.next();
203
1717
                                break;
204
                            }
205
647
                            Ordering::Greater => self.other_next = self.other_iter.next(),
206
2419
                            Ordering::Less => return Some(self_val), // self_val is smaller, yield it
207
                        }
208
                    }
209
4605
                    None => return Some(self_val), // other is exhausted, yield remaining elements of self
210
                }
211
            }
212
        }
213

            
214
2071
        None
215
9095
    }
216
}
217

            
218
impl<T: Ord> Default for VecSet<T> {
219
    fn default() -> Self {
220
        Self::new()
221
    }
222
}
223

            
224
impl<'a, T> IntoIterator for &'a VecSet<T> {
225
    type Item = &'a T;
226
    type IntoIter = Iter<'a, T>;
227

            
228
979
    fn into_iter(self) -> Self::IntoIter {
229
979
        self.sorted_array.iter()
230
979
    }
231
}
232

            
233
impl<T: fmt::Debug> fmt::Debug for VecSet<T> {
234
202
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
235
202
        write!(f, "{{{:?}}}", self.sorted_array.iter().format(", "))
236
202
    }
237
}
238

            
239
#[cfg(test)]
240
mod tests {
241
    use itertools::Itertools;
242
    use rand::RngExt;
243

            
244
    use merc_utilities::random_test;
245

            
246
    use crate::VecSet;
247

            
248
    #[test]
249
1
    fn test_random_vecset_difference() {
250
100
        random_test(100, |rng| {
251
100
            let size = rng.random_range(0..20);
252
100
            let size2 = rng.random_range(0..20);
253
925
            let vec1: Vec<u32> = (0..size).map(|_| rng.random_range(0..100)).collect();
254
956
            let vec2: Vec<u32> = (0..size2).map(|_| rng.random_range(0..100)).collect();
255

            
256
100
            let set1 = VecSet::from_vec(vec1.clone());
257
100
            let set2 = VecSet::from_vec(vec2.clone());
258

            
259
100
            println!("left: {:?}", set1);
260
100
            println!("right: {:?}", set2);
261

            
262
100
            let difference: Vec<u32> = set1.difference(&set2).cloned().collect();
263
100
            let expected_difference: Vec<u32> = vec1
264
100
                .into_iter()
265
925
                .filter(|x| !vec2.contains(x))
266
100
                .sorted()
267
100
                .dedup()
268
100
                .collect();
269

            
270
100
            assert_eq!(difference, expected_difference);
271
100
        })
272
1
    }
273
}