tracing_mutex/graph.rs
1use std::cell::Cell;
2use std::collections::hash_map::Entry;
3use std::collections::HashMap;
4use std::collections::HashSet;
5use std::hash::Hash;
6
7type Order = usize;
8
9/// Directed Graph with dynamic topological sorting
10///
11/// Design and implementation based "A Dynamic Topological Sort Algorithm for Directed Acyclic
12/// Graphs" by David J. Pearce and Paul H.J. Kelly which can be found on [the author's
13/// website][paper].
14///
15/// Variable- and method names have been chosen to reflect most closely resemble the names in the
16/// original paper.
17///
18/// This digraph tracks its own topological order and updates it when new edges are added to the
19/// graph. If a cycle is added that would create a cycle, that edge is rejected and the graph is not
20/// visibly changed.
21///
22/// [paper]: https://whileydave.com/publications/pk07_jea/
23#[derive(Debug)]
24pub struct DiGraph<V, E>
25where
26 V: Eq + Hash + Copy,
27{
28 nodes: HashMap<V, Node<V, E>>,
29 // Instead of reordering the orders in the graph whenever a node is deleted, we maintain a list
30 // of unused ids that can be handed out later again.
31 unused_order: Vec<Order>,
32}
33
34#[derive(Debug)]
35struct Node<V, E>
36where
37 V: Eq + Hash + Clone,
38{
39 in_edges: HashSet<V>,
40 out_edges: HashMap<V, E>,
41 // The "Ord" field is a Cell to ensure we can update it in an immutable context.
42 // `std::collections::HashMap` doesn't let you have multiple mutable references to elements, but
43 // this way we can use immutable references and still update `ord`. This saves quite a few
44 // hashmap lookups in the final reorder function.
45 ord: Cell<Order>,
46}
47
48impl<V, E> DiGraph<V, E>
49where
50 V: Eq + Hash + Copy,
51{
52 /// Add a new node to the graph.
53 ///
54 /// If the node already existed, this function does not add it and uses the existing node data.
55 /// The function returns mutable references to the in-edges, out-edges, and finally the index of
56 /// the node in the topological order.
57 ///
58 /// New nodes are appended to the end of the topological order when added.
59 fn add_node(&mut self, n: V) -> (&mut HashSet<V>, &mut HashMap<V, E>, Order) {
60 // need to compute next id before the call to entry() to avoid duplicate borrow of nodes
61 let fallback_id = self.nodes.len();
62
63 let node = self.nodes.entry(n).or_insert_with(|| {
64 let order = if let Some(id) = self.unused_order.pop() {
65 // Reuse discarded ordering entry
66 id
67 } else {
68 // Allocate new order id
69 fallback_id
70 };
71
72 Node {
73 ord: Cell::new(order),
74 in_edges: Default::default(),
75 out_edges: Default::default(),
76 }
77 });
78
79 (&mut node.in_edges, &mut node.out_edges, node.ord.get())
80 }
81
82 pub(crate) fn remove_node(&mut self, n: V) -> bool {
83 match self.nodes.remove(&n) {
84 None => false,
85 Some(Node {
86 out_edges,
87 in_edges,
88 ord,
89 }) => {
90 // Return ordering to the pool of unused ones
91 self.unused_order.push(ord.get());
92
93 out_edges.into_keys().for_each(|m| {
94 self.nodes.get_mut(&m).unwrap().in_edges.remove(&n);
95 });
96
97 in_edges.into_iter().for_each(|m| {
98 self.nodes.get_mut(&m).unwrap().out_edges.remove(&n);
99 });
100
101 true
102 }
103 }
104 }
105
106 /// Attempt to add an edge to the graph
107 ///
108 /// Nodes, both from and to, are created as needed when creating new edges. If the new edge
109 /// would introduce a cycle, the edge is rejected and `false` is returned.
110 ///
111 /// # Errors
112 ///
113 /// If the edge would introduce the cycle, the underlying graph is not modified and a list of
114 /// all the edge data in the would-be cycle is returned instead.
115 pub(crate) fn add_edge(&mut self, x: V, y: V, e: impl FnOnce() -> E) -> Result<(), Vec<E>>
116 where
117 E: Clone,
118 {
119 if x == y {
120 // self-edges are always considered cycles
121 return Err(Vec::new());
122 }
123
124 let (_, out_edges, ub) = self.add_node(x);
125
126 match out_edges.entry(y) {
127 Entry::Occupied(_) => {
128 // Edge already exists, nothing to be done
129 return Ok(());
130 }
131 Entry::Vacant(entry) => entry.insert(e()),
132 };
133
134 let (in_edges, _, lb) = self.add_node(y);
135
136 in_edges.insert(x);
137
138 if lb < ub {
139 // This edge might introduce a cycle, need to recompute the topological sort
140 let mut visited = [x, y].into_iter().collect();
141 let mut delta_f = Vec::new();
142 let mut delta_b = Vec::new();
143
144 if let Err(cycle) = self.dfs_f(&self.nodes[&y], ub, &mut visited, &mut delta_f) {
145 // This edge introduces a cycle, so we want to reject it and remove it from the
146 // graph again to keep the "does not contain cycles" invariant.
147
148 // We use map instead of unwrap to avoid an `unwrap()` but we know that these
149 // entries are present as we just added them above.
150 self.nodes.get_mut(&y).map(|node| node.in_edges.remove(&x));
151 self.nodes.get_mut(&x).map(|node| node.out_edges.remove(&y));
152
153 // No edge was added
154 return Err(cycle);
155 }
156
157 // No need to check as we should've found the cycle on the forward pass
158 self.dfs_b(&self.nodes[&x], lb, &mut visited, &mut delta_b);
159
160 // Original paper keeps it around but this saves us from clearing it
161 drop(visited);
162
163 self.reorder(delta_f, delta_b);
164 }
165
166 Ok(())
167 }
168
169 /// Forwards depth-first-search
170 fn dfs_f<'a>(
171 &'a self,
172 n: &'a Node<V, E>,
173 ub: Order,
174 visited: &mut HashSet<V>,
175 delta_f: &mut Vec<&'a Node<V, E>>,
176 ) -> Result<(), Vec<E>>
177 where
178 E: Clone,
179 {
180 delta_f.push(n);
181
182 for (w, e) in &n.out_edges {
183 let node = &self.nodes[w];
184 let ord = node.ord.get();
185
186 if ord == ub {
187 // Found a cycle
188 return Err(vec![e.clone()]);
189 } else if !visited.contains(w) && ord < ub {
190 // Need to check recursively
191 visited.insert(*w);
192 if let Err(mut chain) = self.dfs_f(node, ub, visited, delta_f) {
193 chain.push(e.clone());
194 return Err(chain);
195 }
196 }
197 }
198
199 Ok(())
200 }
201
202 /// Backwards depth-first-search
203 fn dfs_b<'a>(
204 &'a self,
205 n: &'a Node<V, E>,
206 lb: Order,
207 visited: &mut HashSet<V>,
208 delta_b: &mut Vec<&'a Node<V, E>>,
209 ) {
210 delta_b.push(n);
211
212 for w in &n.in_edges {
213 let node = &self.nodes[w];
214 if !visited.contains(w) && lb < node.ord.get() {
215 visited.insert(*w);
216
217 self.dfs_b(node, lb, visited, delta_b);
218 }
219 }
220 }
221
222 fn reorder(&self, mut delta_f: Vec<&Node<V, E>>, mut delta_b: Vec<&Node<V, E>>) {
223 self.sort(&mut delta_f);
224 self.sort(&mut delta_b);
225
226 let mut l = Vec::with_capacity(delta_f.len() + delta_b.len());
227 let mut orders = Vec::with_capacity(delta_f.len() + delta_b.len());
228
229 for v in delta_b.into_iter().chain(delta_f) {
230 orders.push(v.ord.get());
231 l.push(v);
232 }
233
234 // Original paper cleverly merges the two lists by using that both are sorted. We just sort
235 // again. This is slower but also much simpler.
236 orders.sort_unstable();
237
238 for (node, order) in l.into_iter().zip(orders) {
239 node.ord.set(order);
240 }
241 }
242
243 fn sort(&self, ids: &mut [&Node<V, E>]) {
244 // Can use unstable sort because mutex ids should not be equal
245 ids.sort_unstable_by_key(|v| &v.ord);
246 }
247}
248
249// Manual `Default` impl as derive causes unnecessarily strong bounds.
250impl<V, E> Default for DiGraph<V, E>
251where
252 V: Eq + Hash + Copy,
253{
254 fn default() -> Self {
255 Self {
256 nodes: Default::default(),
257 unused_order: Default::default(),
258 }
259 }
260}
261
262#[cfg(test)]
263mod tests {
264 use rand::seq::SliceRandom;
265 use rand::thread_rng;
266
267 use super::*;
268
269 fn nop() {}
270
271 #[test]
272 fn test_no_self_cycle() {
273 // Regression test for https://github.com/bertptrs/tracing-mutex/issues/7
274 let mut graph = DiGraph::default();
275
276 assert!(graph.add_edge(1, 1, nop).is_err());
277 }
278
279 #[test]
280 fn test_digraph() {
281 let mut graph = DiGraph::default();
282
283 // Add some safe edges
284 assert!(graph.add_edge(0, 1, nop).is_ok());
285 assert!(graph.add_edge(1, 2, nop).is_ok());
286 assert!(graph.add_edge(2, 3, nop).is_ok());
287 assert!(graph.add_edge(4, 2, nop).is_ok());
288
289 // Try to add an edge that introduces a cycle
290 assert!(graph.add_edge(3, 1, nop).is_err());
291
292 // Add an edge that should reorder 0 to be after 4
293 assert!(graph.add_edge(4, 0, nop).is_ok());
294 }
295
296 /// Fuzz the DiGraph implementation by adding a bunch of valid edges.
297 ///
298 /// This test generates all possible forward edges in a 100-node graph consisting of natural
299 /// numbers, shuffles them, then adds them to the graph. This will always be a valid directed,
300 /// acyclic graph because there is a trivial order (the natural numbers) but because the edges
301 /// are added in a random order the DiGraph will still occasionally need to reorder nodes.
302 #[test]
303 fn fuzz_digraph() {
304 // Note: this fuzzer is quadratic in the number of nodes, so this cannot be too large or it
305 // will slow down the tests too much.
306 const NUM_NODES: usize = 100;
307 let mut edges = Vec::with_capacity(NUM_NODES * NUM_NODES);
308
309 for i in 0..NUM_NODES {
310 for j in i..NUM_NODES {
311 if i != j {
312 edges.push((i, j));
313 }
314 }
315 }
316
317 edges.shuffle(&mut thread_rng());
318
319 let mut graph = DiGraph::default();
320
321 for (x, y) in edges {
322 assert!(graph.add_edge(x, y, nop).is_ok());
323 }
324 }
325}