1
use std::cell::RefCell;
2
use std::hash::Hash;
3
use std::hash::Hasher;
4
use std::rc::Rc;
5

            
6
use ldd::LddIndex;
7
use ldd::SharedProtectionSet;
8
use merc_collections::IndexedSet;
9
use merc_collections::ProtectionSet;
10

            
11
use crate::operations::height;
12

            
13
mod cache;
14
mod ldd;
15

            
16
pub use self::cache::*;
17
pub use self::ldd::Ldd;
18
pub use self::ldd::LddRef;
19

            
20
pub type Value = u32;
21

            
22
/// This is the LDD node(value, down, right) with some additional meta data.
23
#[derive(Clone)]
24
pub struct Node {
25
    value: Value,
26
    down: LddIndex,
27
    right: LddIndex,
28

            
29
    marked: bool,
30
}
31

            
32
/// Check that the node size has the expected size.
33
#[cfg(not(debug_assertions))]
34
const _: () = assert!(std::mem::size_of::<Node>() == std::mem::size_of::<(usize, usize, usize)>());
35

            
36
impl Node {
37
12445244
    fn new(value: Value, down: LddIndex, right: LddIndex) -> Node {
38
12445244
        Node {
39
12445244
            value,
40
12445244
            down,
41
12445244
            right,
42
12445244
            marked: false,
43
12445244
        }
44
12445244
    }
45

            
46
    /// Returns false if the node has been garbage collected.
47
28647
    pub fn is_valid(&self) -> bool {
48
28647
        true
49
28647
    }
50
}
51

            
52
impl PartialEq for Node {
53
5616280
    fn eq(&self, other: &Self) -> bool {
54
5616280
        self.value == other.value && self.down == other.down && self.right == other.right
55
5616280
    }
56
}
57

            
58
impl Eq for Node {}
59

            
60
impl Hash for Node {
61
12445244
    fn hash<H: Hasher>(&self, state: &mut H) {
62
12445244
        self.value.hash(state);
63
12445244
        self.down.hash(state);
64
12445244
        self.right.hash(state);
65
12445244
    }
66
}
67

            
68
/// This is the user facing data of a [Node].
69
pub struct Data(pub Value, pub Ldd, pub Ldd);
70

            
71
/// This is the user facing data of a [Node] as references.
72
pub struct DataRef<'a>(pub Value, pub LddRef<'a>, pub LddRef<'a>);
73

            
74
/// The storage that implements the maximal sharing behaviour. Meaning that
75
/// identical nodes (same value, down and right) have a unique index in the node
76
/// table. Therefore guaranteeing that Ldds n and m are identical iff their
77
/// indices in the node table match.
78
pub struct Storage {
79
    protection_set: SharedProtectionSet, // Every Ldd points to the underlying protection set.
80
    nodes: IndexedSet<Node>,
81
    cache: OperationCache,
82

            
83
    count_until_collection: u64,     // Count down until the next garbage collection.
84
    enable_garbage_collection: bool, // Whether to enable automatic garbage collection based on heuristics.
85
    enable_performance_metrics: bool,
86
    empty_set: Ldd,
87
    empty_vector: Ldd,
88
}
89

            
90
impl Default for Storage {
91
    fn default() -> Self {
92
        Self::new()
93
    }
94
}
95

            
96
impl Storage {
97
2632
    pub fn new() -> Self {
98
2632
        let shared = Rc::new(RefCell::new(ProtectionSet::new()));
99
        // Add two nodes representing 'false' and 'true' respectively; these cannot be created using insert.
100
2632
        let mut nodes = IndexedSet::new();
101
2632
        let empty_set = nodes.insert(Node::new(0, LddIndex::default(), LddIndex::default())).0;
102
2632
        let empty_vector = nodes.insert(Node::new(1, LddIndex::default(), LddIndex::default())).0;
103

            
104
2632
        Self {
105
2632
            protection_set: shared.clone(),
106
2632
            nodes,
107
2632
            cache: OperationCache::new(Rc::clone(&shared)),
108
2632

            
109
2632
            count_until_collection: 10000,
110
2632
            enable_garbage_collection: true,
111
2632
            enable_performance_metrics: false,
112
2632
            empty_set: Ldd::new(&shared, empty_set),
113
2632
            empty_vector: Ldd::new(&shared, empty_vector),
114
2632
        }
115
2632
    }
116

            
117
    /// Provides access to the underlying operation cache.
118
20919768
    pub fn operation_cache(&mut self) -> &mut OperationCache {
119
20919768
        &mut self.cache
120
20919768
    }
121

            
122
    /// Create a new LDD singleton(value)
123
12432
    pub fn insert_singleton(&mut self, value: Value) -> Ldd {
124
12432
        if self.count_until_collection == 0 {
125
            if self.enable_garbage_collection {
126
                self.garbage_collect();
127
            }
128
            self.count_until_collection = self.nodes.len() as u64;
129
12432
        }
130

            
131
12432
        let (index, _inserted) = self.nodes.insert(Node::new(value, self.empty_vector().index(), self.empty_set().index()));
132

            
133
12432
        Ldd::new(&self.protection_set, index)
134
12432
    }
135

            
136
    /// Create a new LDD node(value, down, right)
137
12427548
    pub fn insert(&mut self, value: Value, down: &LddRef, right: &LddRef) -> Ldd {
138
        // These invariants ensure that the result is a valid LDD.
139
12427548
        debug_assert_ne!(down, self.empty_set(), "down node can never be the empty set.");
140
12427548
        debug_assert_ne!(right, self.empty_vector(), "right node can never be the empty vector.");
141
12427548
        debug_assert!(*down.index() < self.nodes.len(), "down node not in table.");
142
12427548
        debug_assert!(*right.index() < self.nodes.len(), "right not not in table.");
143

            
144
12427548
        if right != self.empty_set() {
145
4430465
            debug_assert_eq!(
146
4430465
                height(self, down) + 1,
147
4430465
                height(self, right),
148
                "height of node {} should match the right node {} height.",
149
                down.index(),
150
                right.index()
151
            );
152
4430465
            debug_assert!(value < self.value(right), "value should be less than right node value.");
153
7997083
        }
154

            
155
12427548
        if self.count_until_collection == 0 {
156
            if self.enable_garbage_collection {
157
                self.garbage_collect();
158
            }
159
            self.count_until_collection = self.nodes.len() as u64;
160
12427548
        }
161

            
162
12427548
        let (index, _inserted) = self.nodes.insert(Node::new(value, down.index(), right.index()));
163

            
164
12427548
        Ldd::new(&self.protection_set, index)
165
12427548
    }
166

            
167
    /// Upgrade an [LddRef] to a protected [Ldd] instance.
168
4560951
    pub fn protect(&self, ldd: &LddRef) -> Ldd {
169
4560951
        Ldd::new(&self.protection_set, ldd.index())
170
4560951
    }
171

            
172
    /// Cleans up all LDDs that are unreachable from the root LDDs.
173
240
    pub fn garbage_collect(&mut self) {
174
        // Clear the cache since it contains unprotected LDDs, and keep track of size before clearing.
175
240
        let size_of_cache = self.cache.len();
176
240
        self.cache.clear();
177
240
        self.cache.limit(self.nodes.len());
178

            
179
        // Mark all nodes that are (indirect) children of nodes with positive reference count.
180
240
        let mut stack: Vec<LddIndex> = Vec::new();
181
840
        for (_root, index) in self.protection_set.borrow().iter() {
182
840
            mark_node(&mut self.nodes, &mut stack, *index);
183
840
        }
184

            
185
        // Collect all garbage nodes.
186
240
        let mut number_of_collections: usize = 0;
187
223784
        self.nodes.retain_mut(|_, node| {
188
223784
            if node.marked {
189
9589
                debug_assert!(node.is_valid(), "Should never mark a node that is not valid.");
190
9589
                node.marked = false;
191
9589
                true
192
            } else {
193
214195
                number_of_collections += 1;
194
214195
                false
195
            }
196
223784
        });
197

            
198
        // Check whether the direct children of a valid node are valid (this implies that the whole tree is valid if the root is valid).
199
9589
        for (_, node) in &self.nodes {
200
            // Special cases for the empty set and empty vector.
201
9589
            debug_assert!(
202
9589
                *node.down == 0 || self.nodes.get(node.down).is_some(),
203
                "The down node of a valid node must be valid."
204
            );
205
9589
            debug_assert!(
206
9589
                *node.right == 0 || self.nodes.get(node.right).is_some(),
207
                "The right node of a valid node must be valid."
208
            );
209
        }
210

            
211
240
        if self.enable_performance_metrics {
212
            println!(
213
                "Collected {number_of_collections} elements and {} elements remaining",
214
                self.nodes.len()
215
            );
216
            println!("Operation cache contains {size_of_cache} elements");
217
240
        }
218
240
    }
219

            
220
    /// Enables automatic garbage collection, which is enabled by default.
221
    pub fn enable_garbage_collection(&mut self, enabled: bool) {
222
        self.enable_garbage_collection = enabled;
223
    }
224

            
225
    pub fn enable_performance_metrics(&mut self, enabled: bool) {
226
        self.enable_performance_metrics = enabled;
227
    }
228

            
229
    /// The 'false' LDD.
230
298982250
    pub fn empty_set(&self) -> &Ldd {
231
298982250
        &self.empty_set
232
298982250
    }
233

            
234
    /// The 'true' LDD.
235
255369520
    pub fn empty_vector(&self) -> &Ldd {
236
255369520
        &self.empty_vector
237
255369520
    }
238

            
239
    /// The value of an LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
240
4430465
    pub fn value(&self, ldd: &LddRef) -> Value {
241
4430465
        self.verify_ldd(ldd);
242
4430465
        let node = &self.nodes[ldd.index()];
243
4430465
        node.value
244
4430465
    }
245

            
246
    /// The down of an LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
247
    pub fn down(&self, ldd: &LddRef) -> Ldd {
248
        self.verify_ldd(ldd);
249
        let node = &self.nodes[ldd.index()];
250
        Ldd::new(&self.protection_set, node.down)
251
    }
252

            
253
    /// The right of an LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
254
    pub fn right(&self, ldd: &LddRef) -> Ldd {
255
        self.verify_ldd(ldd);
256
        let node = &self.nodes[ldd.index()];
257
        Ldd::new(&self.protection_set, node.right)
258
    }
259

            
260
    /// Returns a Data tuple for the given LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
261
1090956
    pub fn get(&self, ldd: &LddRef) -> Data {
262
1090956
        self.verify_ldd(ldd);
263
1090956
        let node = &self.nodes[ldd.index()];
264
1090956
        Data(
265
1090956
            node.value,
266
1090956
            Ldd::new(&self.protection_set, node.down),
267
1090956
            Ldd::new(&self.protection_set, node.right),
268
1090956
        )
269
1090956
    }
270

            
271
    /// Returns a DataRef tuple for the given LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
272
117615202
    pub fn get_ref<'a>(&self, ldd: &'a LddRef) -> DataRef<'a> {
273
117615202
        self.verify_ldd(ldd);
274
117615202
        let node = &self.nodes[ldd.index()];
275
117615202
        DataRef(node.value, LddRef::new(node.down), LddRef::new(node.right))
276
117615202
    }
277

            
278
    // Asserts whether the given ldd is valid.
279
123136623
    fn verify_ldd(&self, ldd: &LddRef) {
280
123136623
        debug_assert_ne!(ldd, self.empty_set(), "Cannot inspect empty set.");
281
123136623
        debug_assert_ne!(ldd, self.empty_vector(), "Cannot inspect empty vector.");
282
123136623
        debug_assert!(
283
123136623
            self.nodes.get(ldd.index()).is_some(),
284
            "Node {} should not have been garbage collected",
285
            ldd.index()
286
        );
287
123136623
    }
288
}
289

            
290
impl Drop for Storage {
291
2632
    fn drop(&mut self) {
292
2632
        if self.enable_performance_metrics {
293
            println!(
294
                "There were {} insertions into the protection set.",
295
                self.protection_set.borrow().number_of_insertions()
296
            );
297
            println!(
298
                "There were at most {} root variables.",
299
                self.protection_set.borrow().maximum_size()
300
            );
301
            println!("There were at most {} nodes.", self.nodes.capacity());
302
2632
        }
303
2632
    }
304
}
305

            
306
/// Mark all LDDs reachable from the given root index.
307
///
308
/// Reuses the stack for the depth-first exploration.
309
840
fn mark_node(nodes: &mut IndexedSet<Node>, stack: &mut Vec<LddIndex>, root: LddIndex) {
310
840
    stack.push(root);
311
19898
    while let Some(current) = stack.pop() {
312
19058
        let node = &mut nodes[current];
313
19058
        debug_assert!(node.is_valid(), "Should never mark a node that is not valid.");
314
19058
        if node.marked {
315
9469
            continue;
316
        } else {
317
9589
            node.marked = true;
318
9589
            if *current != 0 && *current != 1 {
319
9109
                stack.push(node.down);
320
9109
                stack.push(node.right);
321
9109
            }
322
        }
323
    }
324

            
325
840
    debug_assert!(stack.is_empty(), "When marking finishes the stack should be empty.");
326
840
}
327

            
328
#[cfg(test)]
329
mod tests {
330

            
331
    use super::*;
332
    use crate::operations::singleton;
333
    use crate::test_utility::*;
334

            
335
    use merc_utilities::random_test;
336

            
337
    #[test]
338
1
    fn test_random_garbage_collection_small() {
339
100
        random_test(100, |rng| {
340
100
            let mut storage = Storage::new();
341

            
342
            let _child: Ldd;
343
100
            {
344
100
                // Make sure that this set goes out of scope, but keep a reference to some child ldd.
345
100
                let vector = random_vector(rng, 10, 10);
346
100
                let ldd = singleton(&mut storage, &vector);
347
100

            
348
100
                _child = storage.get(&ldd).1;
349
100
                storage.garbage_collect();
350
100
            }
351

            
352
100
            storage.garbage_collect();
353
100
        });
354
1
    }
355

            
356
    #[test]
357
    #[cfg_attr(miri, ignore)]
358
1
    fn test_random_garbage_collection() {
359
20
        random_test(20, |rng| {
360
20
            let mut storage = Storage::new();
361

            
362
            let _child: Ldd;
363
20
            {
364
20
                // Make sure that this set goes out of scope, but keep a reference to some child ldd.
365
20
                let vector = random_vector_set(rng, 2000, 10, 2);
366
20
                let ldd = from_iter(&mut storage, vector.iter());
367
20

            
368
20
                _child = storage.get(&storage.get(&ldd).1).1;
369
20
                storage.garbage_collect();
370
20
            }
371

            
372
20
            storage.garbage_collect();
373
20
        });
374
1
    }
375
}