1
use std::cell::RefCell;
2
use std::hash::Hash;
3
use std::hash::Hasher;
4
use std::rc::Rc;
5

            
6
use ldd::LddIndex;
7
use ldd::SharedProtectionSet;
8
use merc_collections::IndexedSet;
9
use merc_collections::ProtectionSet;
10

            
11
use crate::operations::height;
12

            
13
mod cache;
14
mod ldd;
15

            
16
pub use self::cache::*;
17
pub use self::ldd::Ldd;
18
pub use self::ldd::LddRef;
19

            
20
pub type Value = u32;
21

            
22
/// This is the LDD node(value, down, right) with some additional meta data.
23
#[derive(Clone)]
24
pub struct Node {
25
    value: Value,
26
    down: LddIndex,
27
    right: LddIndex,
28

            
29
    marked: bool,
30
}
31

            
32
/// Check that the node size has the expected size.
33
#[cfg(not(debug_assertions))]
34
const _: () = assert!(std::mem::size_of::<Node>() == std::mem::size_of::<(usize, usize, usize)>());
35

            
36
impl Node {
37
13206056
    fn new(value: Value, down: LddIndex, right: LddIndex) -> Node {
38
13206056
        Node {
39
13206056
            value,
40
13206056
            down,
41
13206056
            right,
42
13206056
            marked: false,
43
13206056
        }
44
13206056
    }
45

            
46
    /// Returns false if the node has been garbage collected.
47
28791
    pub fn is_valid(&self) -> bool {
48
28791
        true
49
28791
    }
50
}
51

            
52
impl PartialEq for Node {
53
5796296
    fn eq(&self, other: &Self) -> bool {
54
5796296
        self.value == other.value && self.down == other.down && self.right == other.right
55
5796296
    }
56
}
57

            
58
impl Eq for Node {}
59

            
60
impl Hash for Node {
61
13206056
    fn hash<H: Hasher>(&self, state: &mut H) {
62
13206056
        self.value.hash(state);
63
13206056
        self.down.hash(state);
64
13206056
        self.right.hash(state);
65
13206056
    }
66
}
67

            
68
/// This is the user facing data of a [Node].
69
pub struct Data(pub Value, pub Ldd, pub Ldd);
70

            
71
/// This is the user facing data of a [Node] as references.
72
pub struct DataRef<'a>(pub Value, pub LddRef<'a>, pub LddRef<'a>);
73

            
74
/// The storage that implements the maximal sharing behaviour. Meaning that
75
/// identical nodes (same value, down and right) have a unique index in the node
76
/// table. Therefore guaranteeing that Ldds n and m are identical iff their
77
/// indices in the node table match.
78
pub struct Storage {
79
    protection_set: SharedProtectionSet, // Every Ldd points to the underlying protection set.
80
    nodes: IndexedSet<Node>,
81
    cache: OperationCache,
82

            
83
    count_until_collection: u64,     // Count down until the next garbage collection.
84
    enable_garbage_collection: bool, // Whether to enable automatic garbage collection based on heuristics.
85
    enable_performance_metrics: bool,
86
    empty_set: Ldd,
87
    empty_vector: Ldd,
88
}
89

            
90
impl Default for Storage {
91
    fn default() -> Self {
92
        Self::new()
93
    }
94
}
95

            
96
impl Storage {
97
2932
    pub fn new() -> Self {
98
2932
        let shared = Rc::new(RefCell::new(ProtectionSet::new()));
99
        // Add two nodes representing 'false' and 'true' respectively; these cannot be created using insert.
100
2932
        let mut nodes = IndexedSet::new();
101
2932
        let empty_set = nodes.insert(Node::new(0, LddIndex::default(), LddIndex::default())).0;
102
2932
        let empty_vector = nodes.insert(Node::new(1, LddIndex::default(), LddIndex::default())).0;
103

            
104
2932
        Self {
105
2932
            protection_set: shared.clone(),
106
2932
            nodes,
107
2932
            cache: OperationCache::new(Rc::clone(&shared)),
108
2932

            
109
2932
            count_until_collection: 10000,
110
2932
            enable_garbage_collection: true,
111
2932
            enable_performance_metrics: false,
112
2932
            empty_set: Ldd::new(&shared, empty_set),
113
2932
            empty_vector: Ldd::new(&shared, empty_vector),
114
2932
        }
115
2932
    }
116

            
117
    /// Provides access to the underlying operation cache.
118
21663682
    pub fn operation_cache(&mut self) -> &mut OperationCache {
119
21663682
        &mut self.cache
120
21663682
    }
121

            
122
    /// Create a new LDD singleton(value)
123
12447
    pub fn insert_singleton(&mut self, value: Value) -> Ldd {
124
12447
        if self.count_until_collection == 0 {
125
            if self.enable_garbage_collection {
126
                self.garbage_collect();
127
            }
128
            self.count_until_collection = self.nodes.len() as u64;
129
12447
        }
130

            
131
12447
        let (index, _inserted) =
132
12447
            self.nodes
133
12447
                .insert(Node::new(value, self.empty_vector().index(), self.empty_set().index()));
134

            
135
12447
        Ldd::new(&self.protection_set, index)
136
12447
    }
137

            
138
    /// Create a new LDD node(value, down, right)
139
13187745
    pub fn insert(&mut self, value: Value, down: &LddRef, right: &LddRef) -> Ldd {
140
        // These invariants ensure that the result is a valid LDD.
141
13187745
        debug_assert_ne!(down, self.empty_set(), "down node can never be the empty set.");
142
13187745
        debug_assert_ne!(right, self.empty_vector(), "right node can never be the empty vector.");
143
13187745
        debug_assert!(*down.index() < self.nodes.len(), "down node not in table.");
144
13187745
        debug_assert!(*right.index() < self.nodes.len(), "right not not in table.");
145

            
146
13187745
        if right != self.empty_set() {
147
4584806
            debug_assert_eq!(
148
4584806
                height(self, down) + 1,
149
4584806
                height(self, right),
150
                "height of node {} should match the right node {} height.",
151
                down.index(),
152
                right.index()
153
            );
154
4584806
            debug_assert!(value < self.value(right), "value should be less than right node value.");
155
8602939
        }
156

            
157
13187745
        if self.count_until_collection == 0 {
158
            if self.enable_garbage_collection {
159
                self.garbage_collect();
160
            }
161
            self.count_until_collection = self.nodes.len() as u64;
162
13187745
        }
163

            
164
13187745
        let (index, _inserted) = self.nodes.insert(Node::new(value, down.index(), right.index()));
165

            
166
13187745
        Ldd::new(&self.protection_set, index)
167
13187745
    }
168

            
169
    /// Upgrade an [LddRef] to a protected [Ldd] instance.
170
6136688
    pub fn protect(&self, ldd: &LddRef) -> Ldd {
171
6136688
        Ldd::new(&self.protection_set, ldd.index())
172
6136688
    }
173

            
174
    /// Cleans up all LDDs that are unreachable from the root LDDs.
175
240
    pub fn garbage_collect(&mut self) {
176
        // Clear the cache since it contains unprotected LDDs, and keep track of size before clearing.
177
240
        let size_of_cache = self.cache.len();
178
240
        self.cache.clear();
179
240
        self.cache.limit(self.nodes.len());
180

            
181
        // Mark all nodes that are (indirect) children of nodes with positive reference count.
182
240
        let mut stack: Vec<LddIndex> = Vec::new();
183
840
        for (_root, index) in self.protection_set.borrow().iter() {
184
840
            mark_node(&mut self.nodes, &mut stack, *index);
185
840
        }
186

            
187
        // Collect all garbage nodes.
188
240
        let mut number_of_collections: usize = 0;
189
223013
        self.nodes.retain_mut(|_, node| {
190
223013
            if node.marked {
191
9637
                debug_assert!(node.is_valid(), "Should never mark a node that is not valid.");
192
9637
                node.marked = false;
193
9637
                true
194
            } else {
195
213376
                number_of_collections += 1;
196
213376
                false
197
            }
198
223013
        });
199

            
200
        // Check whether the direct children of a valid node are valid (this implies that the whole tree is valid if the root is valid).
201
9637
        for (_, node) in &self.nodes {
202
            // Special cases for the empty set and empty vector.
203
9637
            debug_assert!(
204
9637
                *node.down == 0 || self.nodes.get(node.down).is_some(),
205
                "The down node of a valid node must be valid."
206
            );
207
9637
            debug_assert!(
208
9637
                *node.right == 0 || self.nodes.get(node.right).is_some(),
209
                "The right node of a valid node must be valid."
210
            );
211
        }
212

            
213
240
        if self.enable_performance_metrics {
214
            println!(
215
                "Collected {number_of_collections} elements and {} elements remaining",
216
                self.nodes.len()
217
            );
218
            println!("Operation cache contains {size_of_cache} elements");
219
240
        }
220
240
    }
221

            
222
    /// Enables automatic garbage collection, which is enabled by default.
223
    pub fn enable_garbage_collection(&mut self, enabled: bool) {
224
        self.enable_garbage_collection = enabled;
225
    }
226

            
227
    pub fn enable_performance_metrics(&mut self, enabled: bool) {
228
        self.enable_performance_metrics = enabled;
229
    }
230

            
231
    /// The 'false' LDD.
232
313806524
    pub fn empty_set(&self) -> &Ldd {
233
313806524
        &self.empty_set
234
313806524
    }
235

            
236
    /// The 'true' LDD.
237
266934962
    pub fn empty_vector(&self) -> &Ldd {
238
266934962
        &self.empty_vector
239
266934962
    }
240

            
241
    /// The value of an LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
242
4584806
    pub fn value(&self, ldd: &LddRef) -> Value {
243
4584806
        self.verify_ldd(ldd);
244
4584806
        let node = &self.nodes[ldd.index()];
245
4584806
        node.value
246
4584806
    }
247

            
248
    /// The down of an LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
249
    pub fn down(&self, ldd: &LddRef) -> Ldd {
250
        self.verify_ldd(ldd);
251
        let node = &self.nodes[ldd.index()];
252
        Ldd::new(&self.protection_set, node.down)
253
    }
254

            
255
    /// The right of an LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
256
    pub fn right(&self, ldd: &LddRef) -> Ldd {
257
        self.verify_ldd(ldd);
258
        let node = &self.nodes[ldd.index()];
259
        Ldd::new(&self.protection_set, node.right)
260
    }
261

            
262
    /// Returns a Data tuple for the given LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
263
1091577
    pub fn get(&self, ldd: &LddRef) -> Data {
264
1091577
        self.verify_ldd(ldd);
265
1091577
        let node = &self.nodes[ldd.index()];
266
1091577
        Data(
267
1091577
            node.value,
268
1091577
            Ldd::new(&self.protection_set, node.down),
269
1091577
            Ldd::new(&self.protection_set, node.right),
270
1091577
        )
271
1091577
    }
272

            
273
    /// Returns a DataRef tuple for the given LDD node(value, down, right). Note, ldd cannot be 'true' or 'false.
274
123237122
    pub fn get_ref<'a>(&self, ldd: &'a LddRef) -> DataRef<'a> {
275
123237122
        self.verify_ldd(ldd);
276
123237122
        let node = &self.nodes[ldd.index()];
277
123237122
        DataRef(node.value, LddRef::new(node.down), LddRef::new(node.right))
278
123237122
    }
279

            
280
    // Asserts whether the given ldd is valid.
281
128913505
    fn verify_ldd(&self, ldd: &LddRef) {
282
128913505
        debug_assert_ne!(ldd, self.empty_set(), "Cannot inspect empty set.");
283
128913505
        debug_assert_ne!(ldd, self.empty_vector(), "Cannot inspect empty vector.");
284
128913505
        debug_assert!(
285
128913505
            self.nodes.get(ldd.index()).is_some(),
286
            "Node {} should not have been garbage collected",
287
            ldd.index()
288
        );
289
128913505
    }
290
}
291

            
292
impl Drop for Storage {
293
2932
    fn drop(&mut self) {
294
2932
        if self.enable_performance_metrics {
295
            println!(
296
                "There were {} insertions into the protection set.",
297
                self.protection_set.borrow().number_of_insertions()
298
            );
299
            println!(
300
                "There were at most {} root variables.",
301
                self.protection_set.borrow().maximum_size()
302
            );
303
            println!("There were at most {} nodes.", self.nodes.capacity());
304
2932
        }
305
2932
    }
306
}
307

            
308
/// Mark all LDDs reachable from the given root index.
309
///
310
/// Reuses the stack for the depth-first exploration.
311
840
fn mark_node(nodes: &mut IndexedSet<Node>, stack: &mut Vec<LddIndex>, root: LddIndex) {
312
840
    stack.push(root);
313
19994
    while let Some(current) = stack.pop() {
314
19154
        let node = &mut nodes[current];
315
19154
        debug_assert!(node.is_valid(), "Should never mark a node that is not valid.");
316
19154
        if node.marked {
317
9517
            continue;
318
        } else {
319
9637
            node.marked = true;
320
9637
            if *current != 0 && *current != 1 {
321
9157
                stack.push(node.down);
322
9157
                stack.push(node.right);
323
9157
            }
324
        }
325
    }
326

            
327
840
    debug_assert!(stack.is_empty(), "When marking finishes the stack should be empty.");
328
840
}
329

            
330
#[cfg(test)]
331
mod tests {
332

            
333
    use super::*;
334
    use crate::operations::singleton;
335
    use crate::test_utility::*;
336

            
337
    use merc_utilities::random_test;
338

            
339
    #[test]
340
1
    fn test_random_garbage_collection_small() {
341
100
        random_test(100, |rng| {
342
100
            let mut storage = Storage::new();
343

            
344
            let _child: Ldd;
345
100
            {
346
100
                // Make sure that this set goes out of scope, but keep a reference to some child ldd.
347
100
                let vector = random_vector(rng, 10, 10);
348
100
                let ldd = singleton(&mut storage, &vector);
349
100

            
350
100
                _child = storage.get(&ldd).1;
351
100
                storage.garbage_collect();
352
100
            }
353

            
354
100
            storage.garbage_collect();
355
100
        });
356
1
    }
357

            
358
    #[test]
359
    #[cfg_attr(miri, ignore)]
360
1
    fn test_random_garbage_collection() {
361
20
        random_test(20, |rng| {
362
20
            let mut storage = Storage::new();
363

            
364
            let _child: Ldd;
365
20
            {
366
20
                // Make sure that this set goes out of scope, but keep a reference to some child ldd.
367
20
                let vector = random_vector_set(rng, 2000, 10, 2);
368
20
                let ldd = from_iter(&mut storage, vector.iter());
369
20

            
370
20
                _child = storage.get(&storage.get(&ldd).1).1;
371
20
                storage.garbage_collect();
372
20
            }
373

            
374
20
            storage.garbage_collect();
375
20
        });
376
1
    }
377
}