1
use std::cell::RefCell;
2
use std::hash::Hash;
3
use std::hash::Hasher;
4
use std::rc::Rc;
5

            
6
use ldd::LddIndex;
7
use ldd::SharedProtectionSet;
8
use merc_collections::IndexedSet;
9
use merc_collections::ProtectionSet;
10

            
11
use crate::operations::height;
12

            
13
mod cache;
14
mod ldd;
15

            
16
pub use self::cache::*;
17
pub use self::ldd::Ldd;
18
pub use self::ldd::LddRef;
19

            
20
pub type Value = u32;
21

            
22
/// This is the LDD node(value, down, right) with some additional meta data.
23
#[derive(Clone)]
24
pub struct Node {
25
    value: Value,
26
    down: LddIndex,
27
    right: LddIndex,
28

            
29
    marked: bool,
30
}
31

            
32
/// Check that the node size has the expected size.
33
#[cfg(not(debug_assertions))]
34
const _: () = assert!(std::mem::size_of::<Node>() == std::mem::size_of::<(usize, usize, usize)>());
35

            
36
impl Node {
37
8479035
    fn new(value: Value, down: LddIndex, right: LddIndex) -> Node {
38
8479035
        Node {
39
8479035
            value,
40
8479035
            down,
41
8479035
            right,
42
8479035
            marked: false,
43
8479035
        }
44
8479035
    }
45

            
46
    /// Returns false if the node has been garbage collected.
47
28650
    pub fn is_valid(&self) -> bool {
48
28650
        true
49
28650
    }
50
}
51

            
52
impl PartialEq for Node {
53
3614071
    fn eq(&self, other: &Self) -> bool {
54
3614071
        self.value == other.value && self.down == other.down && self.right == other.right
55
3614071
    }
56
}
57

            
58
impl Eq for Node {}
59

            
60
impl Hash for Node {
61
8479035
    fn hash<H: Hasher>(&self, state: &mut H) {
62
8479035
        self.value.hash(state);
63
8479035
        self.down.hash(state);
64
8479035
        self.right.hash(state);
65
8479035
    }
66
}
67

            
68
/// This is the user facing data of a [Node].
69
pub struct Data(pub Value, pub Ldd, pub Ldd);
70

            
71
/// This is the user facing data of a [Node] as references.
72
pub struct DataRef<'a>(pub Value, pub LddRef<'a>, pub LddRef<'a>);
73

            
74
/// The storage that implements the maximal sharing behaviour. Meaning that
75
/// identical nodes (same value, down and right) have a unique index in the node
76
/// table. Therefore guaranteeing that Ldds n and m are identical iff their
77
/// indices in the node table match.
78
pub struct Storage {
79
    protection_set: SharedProtectionSet, // Every Ldd points to the underlying protection set.
80
    nodes: IndexedSet<Node>,
81
    cache: OperationCache,
82

            
83
    count_until_collection: u64,     // Count down until the next garbage collection.
84
    enable_garbage_collection: bool, // Whether to enable automatic garbage collection based on heuristics.
85
    enable_performance_metrics: bool,
86
    empty_set: Ldd,
87
    empty_vector: Ldd,
88
}
89

            
90
impl Default for Storage {
91
204
    fn default() -> Self {
92
204
        Self::new()
93
204
    }
94
}
95

            
96
impl Storage {
97
2028
    pub fn new() -> Self {
98
2028
        let shared = Rc::new(RefCell::new(ProtectionSet::new()));
99
        // Add two nodes representing 'false' and 'true' respectively; these cannot be created using insert.
100
2028
        let mut nodes = IndexedSet::new();
101
2028
        let empty_set = nodes.insert(Node::new(0, LddIndex::default(), LddIndex::default())).0;
102
2028
        let empty_vector = nodes.insert(Node::new(1, LddIndex::default(), LddIndex::default())).0;
103

            
104
2028
        Self {
105
2028
            protection_set: shared.clone(),
106
2028
            nodes,
107
2028
            cache: OperationCache::new(Rc::clone(&shared)),
108
2028

            
109
2028
            count_until_collection: 10000,
110
2028
            enable_garbage_collection: true,
111
2028
            enable_performance_metrics: false,
112
2028
            empty_set: Ldd::new(&shared, empty_set),
113
2028
            empty_vector: Ldd::new(&shared, empty_vector),
114
2028
        }
115
2028
    }
116

            
117
    /// Provides access to the underlying operation cache.
118
8985145
    pub fn operation_cache(&mut self) -> &mut OperationCache {
119
8985145
        &mut self.cache
120
8985145
    }
121

            
122
    /// Create a new LDD node(value, down, right)
123
8474979
    pub fn insert(&mut self, value: Value, down: &LddRef, right: &LddRef) -> Ldd {
124
        // These invariants ensure that the result is a valid LDD.
125
8474979
        debug_assert_ne!(down, self.empty_set(), "down node can never be the empty set.");
126
8474979
        debug_assert_ne!(right, self.empty_vector(), "right node can never be the empty vector.");
127
8474979
        debug_assert!(*down.index() < self.nodes.len(), "down node not in table.");
128
8474979
        debug_assert!(*right.index() < self.nodes.len(), "right not not in table.");
129

            
130
8474979
        if right != self.empty_set() {
131
2488309
            debug_assert_eq!(
132
2488309
                height(self, down) + 1,
133
2488309
                height(self, right),
134
                "height of node {} should match the right node {} height.",
135
                down.index(),
136
                right.index()
137
            );
138
2488309
            debug_assert!(value < self.value(right), "value should be less than right node value.");
139
5986670
        }
140

            
141
8474979
        if self.count_until_collection == 0 {
142
            if self.enable_garbage_collection {
143
                self.garbage_collect();
144
            }
145
            self.count_until_collection = self.nodes.len() as u64;
146
8474979
        }
147

            
148
8474979
        let (index, _inserted) = self.nodes.insert(Node::new(value, down.index(), right.index()));
149

            
150
8474979
        Ldd::new(&self.protection_set, index)
151
8474979
    }
152

            
153
    /// Upgrade an [LddRef] to a protected [Ldd] instance.
154
1636561
    pub fn protect(&mut self, ldd: &LddRef) -> Ldd {
155
1636561
        Ldd::new(&self.protection_set, ldd.index())
156
1636561
    }
157

            
158
    /// Cleans up all LDDs that are unreachable from the root LDDs.
159
240
    pub fn garbage_collect(&mut self) {
160
        // Clear the cache since it contains unprotected LDDs, and keep track of size before clearing.
161
240
        let size_of_cache = self.cache.len();
162
240
        self.cache.clear();
163
240
        self.cache.limit(self.nodes.len());
164

            
165
        // Mark all nodes that are (indirect) children of nodes with positive reference count.
166
240
        let mut stack: Vec<LddIndex> = Vec::new();
167
840
        for (_root, index) in self.protection_set.borrow().iter() {
168
840
            mark_node(&mut self.nodes, &mut stack, *index);
169
840
        }
170

            
171
        // Collect all garbage nodes.
172
240
        let mut number_of_collections: usize = 0;
173
224156
        self.nodes.retain_mut(|_, node| {
174
224156
            if node.marked {
175
9590
                debug_assert!(node.is_valid(), "Should never mark a node that is not valid.");
176
9590
                node.marked = false;
177
9590
                true
178
            } else {
179
214566
                number_of_collections += 1;
180
214566
                false
181
            }
182
224156
        });
183

            
184
        // Check whether the direct children of a valid node are valid (this implies that the whole tree is valid if the root is valid).
185
9590
        for (_, node) in &self.nodes {
186
            // Special cases for the empty set and empty vector.
187
9590
            debug_assert!(
188
9590
                *node.down == 0 || self.nodes.get(node.down).is_some(),
189
                "The down node of a valid node must be valid."
190
            );
191
9590
            debug_assert!(
192
9590
                *node.right == 0 || self.nodes.get(node.right).is_some(),
193
                "The right node of a valid node must be valid."
194
            );
195
        }
196

            
197
240
        if self.enable_performance_metrics {
198
            println!(
199
                "Collected {number_of_collections} elements and {} elements remaining",
200
                self.nodes.len()
201
            );
202
            println!("Operation cache contains {size_of_cache} elements");
203
240
        }
204
240
    }
205

            
206
    /// Enables automatic garbage collection, which is enabled by default.
207
    pub fn enable_garbage_collection(&mut self, enabled: bool) {
208
        self.enable_garbage_collection = enabled;
209
    }
210

            
211
    pub fn enable_performance_metrics(&mut self, enabled: bool) {
212
        self.enable_performance_metrics = enabled;
213
    }
214

            
215
    /// The 'false' LDD.
216
191952954
    pub fn empty_set(&self) -> &Ldd {
217
191952954
        &self.empty_set
218
191952954
    }
219

            
220
    /// The 'true' LDD.
221
173337366
    pub fn empty_vector(&self) -> &Ldd {
222
173337366
        &self.empty_vector
223
173337366
    }
224

            
225
    /// The value of an LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
226
2488309
    pub fn value(&self, ldd: &LddRef) -> Value {
227
2488309
        self.verify_ldd(ldd);
228
2488309
        let node = &self.nodes[ldd.index()];
229
2488309
        node.value
230
2488309
    }
231

            
232
    /// The down of an LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
233
    pub fn down(&self, ldd: &LddRef) -> Ldd {
234
        self.verify_ldd(ldd);
235
        let node = &self.nodes[ldd.index()];
236
        Ldd::new(&self.protection_set, node.down)
237
    }
238

            
239
    /// The right of an LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
240
    pub fn right(&self, ldd: &LddRef) -> Ldd {
241
        self.verify_ldd(ldd);
242
        let node = &self.nodes[ldd.index()];
243
        Ldd::new(&self.protection_set, node.right)
244
    }
245

            
246
    /// Returns a Data tuple for the given LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
247
1527150
    pub fn get(&self, ldd: &LddRef) -> Data {
248
1527150
        self.verify_ldd(ldd);
249
1527150
        let node = &self.nodes[ldd.index()];
250
1527150
        Data(
251
1527150
            node.value,
252
1527150
            Ldd::new(&self.protection_set, node.down),
253
1527150
            Ldd::new(&self.protection_set, node.right),
254
1527150
        )
255
1527150
    }
256

            
257
    /// Returns a DataRef tuple for the given LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
258
77523222
    pub fn get_ref<'a>(&self, ldd: &'a LddRef) -> DataRef<'a> {
259
77523222
        self.verify_ldd(ldd);
260
77523222
        let node = &self.nodes[ldd.index()];
261
77523222
        DataRef(node.value, LddRef::new(node.down), LddRef::new(node.right))
262
77523222
    }
263

            
264
    // Asserts whether the given ldd is valid.
265
81538681
    fn verify_ldd(&self, ldd: &LddRef) {
266
81538681
        debug_assert_ne!(ldd, self.empty_set(), "Cannot inspect empty set.");
267
81538681
        debug_assert_ne!(ldd, self.empty_vector(), "Cannot inspect empty vector.");
268
81538681
        debug_assert!(
269
81538681
            self.nodes.get(ldd.index()).is_some(),
270
            "Node {} should not have been garbage collected",
271
            ldd.index()
272
        );
273
81538681
    }
274
}
275

            
276
impl Drop for Storage {
277
2028
    fn drop(&mut self) {
278
2028
        if self.enable_performance_metrics {
279
            println!(
280
                "There were {} insertions into the protection set.",
281
                self.protection_set.borrow().number_of_insertions()
282
            );
283
            println!(
284
                "There were at most {} root variables.",
285
                self.protection_set.borrow().maximum_size()
286
            );
287
            println!("There were at most {} nodes.", self.nodes.capacity());
288
2028
        }
289
2028
    }
290
}
291

            
292
/// Mark all LDDs reachable from the given root index.
293
///
294
/// Reuses the stack for the depth-first exploration.
295
840
fn mark_node(nodes: &mut IndexedSet<Node>, stack: &mut Vec<LddIndex>, root: LddIndex) {
296
840
    stack.push(root);
297
19900
    while let Some(current) = stack.pop() {
298
19060
        let node = &mut nodes[current];
299
19060
        debug_assert!(node.is_valid(), "Should never mark a node that is not valid.");
300
19060
        if node.marked {
301
9470
            continue;
302
        } else {
303
9590
            node.marked = true;
304
9590
            if *current != 0 && *current != 1 {
305
9110
                stack.push(node.down);
306
9110
                stack.push(node.right);
307
9110
            }
308
        }
309
    }
310

            
311
840
    debug_assert!(stack.is_empty(), "When marking finishes the stack should be empty.");
312
840
}
313

            
314
#[cfg(test)]
315
mod tests {
316

            
317
    use super::*;
318
    use crate::operations::singleton;
319
    use crate::test_utility::*;
320

            
321
    use merc_utilities::random_test;
322

            
323
    #[test]
324
1
    fn test_random_garbage_collection_small() {
325
100
        random_test(100, |rng| {
326
100
            let mut storage = Storage::new();
327

            
328
            let _child: Ldd;
329
100
            {
330
100
                // Make sure that this set goes out of scope, but keep a reference to some child ldd.
331
100
                let vector = random_vector(rng, 10, 10);
332
100
                let ldd = singleton(&mut storage, &vector);
333
100

            
334
100
                _child = storage.get(&ldd).1;
335
100
                storage.garbage_collect();
336
100
            }
337

            
338
100
            storage.garbage_collect();
339
100
        });
340
1
    }
341

            
342
    #[test]
343
    #[cfg_attr(miri, ignore)]
344
1
    fn test_random_garbage_collection() {
345
20
        random_test(20, |rng| {
346
20
            let mut storage = Storage::new();
347

            
348
            let _child: Ldd;
349
20
            {
350
20
                // Make sure that this set goes out of scope, but keep a reference to some child ldd.
351
20
                let vector = random_vector_set(rng, 2000, 10, 2);
352
20
                let ldd = from_iter(&mut storage, vector.iter());
353
20

            
354
20
                _child = storage.get(&storage.get(&ldd).1).1;
355
20
                storage.garbage_collect();
356
20
            }
357

            
358
20
            storage.garbage_collect();
359
20
        });
360
1
    }
361
}