blob: 5021e5e8fc090d6ad2d8b6a5cdbe3e5f1a7207db [file] [log] [blame]
//! Routine to compute the strongly connected components (SCCs) of a graph.
//!
//! Also computes as the resulting DAG if each SCC is replaced with a
//! node in the graph. This uses [Tarjan's algorithm](
//! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
//! that completes in *O*(*n*) time.
use crate::fx::FxHashSet;
use crate::graph::vec_graph::VecGraph;
use crate::graph::{DirectedGraph, NumEdges, Successors};
use rustc_index::{Idx, IndexSlice, IndexVec};
use std::ops::Range;
#[cfg(test)]
mod tests;
/// Strongly connected components (SCC) of a graph. The type `N` is
/// the index type for the graph nodes and `S` is the index type for
/// the SCCs. We can map from each node to the SCC that it
/// participates in, and we also have the successors of each SCC.
pub struct Sccs<N: Idx, S: Idx> {
/// For each node, what is the SCC index of the SCC to which it
/// belongs.
scc_indices: IndexVec<N, S>,
/// Data about each SCC.
scc_data: SccData<S>,
}
pub struct SccData<S: Idx> {
/// For each SCC, the range of `all_successors` where its
/// successors can be found.
ranges: IndexVec<S, Range<usize>>,
/// Contains the successors for all the Sccs, concatenated. The
/// range of indices corresponding to a given SCC is found in its
/// SccData.
all_successors: Vec<S>,
}
impl<N: Idx, S: Idx + Ord> Sccs<N, S> {
pub fn new(graph: &(impl DirectedGraph<Node = N> + Successors)) -> Self {
SccsConstruction::construct(graph)
}
pub fn scc_indices(&self) -> &IndexSlice<N, S> {
&self.scc_indices
}
pub fn scc_data(&self) -> &SccData<S> {
&self.scc_data
}
/// Returns the number of SCCs in the graph.
pub fn num_sccs(&self) -> usize {
self.scc_data.len()
}
/// Returns an iterator over the SCCs in the graph.
///
/// The SCCs will be iterated in **dependency order** (or **post order**),
/// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
/// This is convenient when the edges represent dependencies: when you visit
/// `S1`, the value for `S2` will already have been computed.
pub fn all_sccs(&self) -> impl Iterator<Item = S> {
(0..self.scc_data.len()).map(S::new)
}
/// Returns the SCC to which a node `r` belongs.
pub fn scc(&self, r: N) -> S {
self.scc_indices[r]
}
/// Returns the successors of the given SCC.
pub fn successors(&self, scc: S) -> &[S] {
self.scc_data.successors(scc)
}
/// Construct the reverse graph of the SCC graph.
pub fn reverse(&self) -> VecGraph<S> {
VecGraph::new(
self.num_sccs(),
self.all_sccs()
.flat_map(|source| {
self.successors(source).iter().map(move |&target| (target, source))
})
.collect(),
)
}
}
impl<N: Idx, S: Idx + Ord> DirectedGraph for Sccs<N, S> {
type Node = S;
fn num_nodes(&self) -> usize {
self.num_sccs()
}
}
impl<N: Idx, S: Idx + Ord> NumEdges for Sccs<N, S> {
fn num_edges(&self) -> usize {
self.scc_data.all_successors.len()
}
}
impl<N: Idx, S: Idx + Ord> Successors for Sccs<N, S> {
fn successors(&self, node: S) -> impl Iterator<Item = Self::Node> {
self.successors(node).iter().cloned()
}
}
impl<S: Idx> SccData<S> {
/// Number of SCCs,
fn len(&self) -> usize {
self.ranges.len()
}
pub fn ranges(&self) -> &IndexSlice<S, Range<usize>> {
&self.ranges
}
pub fn all_successors(&self) -> &Vec<S> {
&self.all_successors
}
/// Returns the successors of the given SCC.
fn successors(&self, scc: S) -> &[S] {
// Annoyingly, `range` does not implement `Copy`, so we have
// to do `range.start..range.end`:
let range = &self.ranges[scc];
&self.all_successors[range.start..range.end]
}
/// Creates a new SCC with `successors` as its successors and
/// returns the resulting index.
fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
// Store the successors on `scc_successors_vec`, remembering
// the range of indices.
let all_successors_start = self.all_successors.len();
self.all_successors.extend(successors);
let all_successors_end = self.all_successors.len();
debug!(
"create_scc({:?}) successors={:?}",
self.ranges.len(),
&self.all_successors[all_successors_start..all_successors_end],
);
self.ranges.push(all_successors_start..all_successors_end)
}
}
struct SccsConstruction<'c, G: DirectedGraph + Successors, S: Idx> {
graph: &'c G,
/// The state of each node; used during walk to record the stack
/// and after walk to record what cycle each node ended up being
/// in.
node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
/// The stack of nodes that we are visiting as part of the DFS.
node_stack: Vec<G::Node>,
/// The stack of successors: as we visit a node, we mark our
/// position in this stack, and when we encounter a successor SCC,
/// we push it on the stack. When we complete an SCC, we can pop
/// everything off the stack that was found along the way.
successors_stack: Vec<S>,
/// A set used to strip duplicates. As we accumulate successors
/// into the successors_stack, we sometimes get duplicate entries.
/// We use this set to remove those -- we also keep its storage
/// around between successors to amortize memory allocation costs.
duplicate_set: FxHashSet<S>,
scc_data: SccData<S>,
}
#[derive(Copy, Clone, Debug)]
enum NodeState<N, S> {
/// This node has not yet been visited as part of the DFS.
///
/// After SCC construction is complete, this state ought to be
/// impossible.
NotVisited,
/// This node is currently being walk as part of our DFS. It is on
/// the stack at the depth `depth`.
///
/// After SCC construction is complete, this state ought to be
/// impossible.
BeingVisited { depth: usize },
/// Indicates that this node is a member of the given cycle.
InCycle { scc_index: S },
/// Indicates that this node is a member of whatever cycle
/// `parent` is a member of. This state is transient: whenever we
/// see it, we try to overwrite it with the current state of
/// `parent` (this is the "path compression" step of a union-find
/// algorithm).
InCycleWith { parent: N },
}
#[derive(Copy, Clone, Debug)]
enum WalkReturn<S> {
Cycle { min_depth: usize },
Complete { scc_index: S },
}
impl<'c, G, S> SccsConstruction<'c, G, S>
where
G: DirectedGraph + Successors,
S: Idx,
{
/// Identifies SCCs in the graph `G` and computes the resulting
/// DAG. This uses a variant of [Tarjan's
/// algorithm][wikipedia]. The high-level summary of the algorithm
/// is that we do a depth-first search. Along the way, we keep a
/// stack of each node whose successors are being visited. We
/// track the depth of each node on this stack (there is no depth
/// if the node is not on the stack). When we find that some node
/// N with depth D can reach some other node N' with lower depth
/// D' (i.e., D' < D), we know that N, N', and all nodes in
/// between them on the stack are part of an SCC.
///
/// [wikipedia]: https://bit.ly/2EZIx84
fn construct(graph: &'c G) -> Sccs<G::Node, S> {
let num_nodes = graph.num_nodes();
let mut this = Self {
graph,
node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
node_stack: Vec::with_capacity(num_nodes),
successors_stack: Vec::new(),
scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() },
duplicate_set: FxHashSet::default(),
};
let scc_indices = (0..num_nodes)
.map(G::Node::new)
.map(|node| match this.start_walk_from(node) {
WalkReturn::Complete { scc_index } => scc_index,
WalkReturn::Cycle { min_depth } => {
panic!("`start_walk_node({node:?})` returned cycle with depth {min_depth:?}")
}
})
.collect();
Sccs { scc_indices, scc_data: this.scc_data }
}
fn start_walk_from(&mut self, node: G::Node) -> WalkReturn<S> {
if let Some(result) = self.inspect_node(node) {
result
} else {
self.walk_unvisited_node(node)
}
}
/// Inspect a node during the DFS. We first examine its current
/// state -- if it is not yet visited (`NotVisited`), return `None` so
/// that the caller might push it onto the stack and start walking its
/// successors.
///
/// If it is already on the DFS stack it will be in the state
/// `BeingVisited`. In that case, we have found a cycle and we
/// return the depth from the stack.
///
/// Otherwise, we are looking at a node that has already been
/// completely visited. We therefore return `WalkReturn::Complete`
/// with its associated SCC index.
fn inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>> {
Some(match self.find_state(node) {
NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
NodeState::NotVisited => return None,
NodeState::InCycleWith { parent } => panic!(
"`find_state` returned `InCycleWith({parent:?})`, which ought to be impossible"
),
})
}
/// Fetches the state of the node `r`. If `r` is recorded as being
/// in a cycle with some other node `r2`, then fetches the state
/// of `r2` (and updates `r` to reflect current result). This is
/// basically the "find" part of a standard union-find algorithm
/// (with path compression).
fn find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S> {
// To avoid recursion we temporarily reuse the `parent` of each
// InCycleWith link to encode a downwards link while compressing
// the path. After we have found the root or deepest node being
// visited, we traverse the reverse links and correct the node
// states on the way.
//
// **Note**: This mutation requires that this is a leaf function
// or at least that none of the called functions inspects the
// current node states. Luckily, we are a leaf.
// Remember one previous link. The termination condition when
// following links downwards is then simply as soon as we have
// found the initial self-loop.
let mut previous_node = node;
// Ultimately assigned by the parent when following
// `InCycleWith` upwards.
let node_state = loop {
debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]);
match self.node_states[node] {
NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index },
NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth },
NodeState::NotVisited => break NodeState::NotVisited,
NodeState::InCycleWith { parent } => {
// We test this, to be extremely sure that we never
// ever break our termination condition for the
// reverse iteration loop.
assert!(node != parent, "Node can not be in cycle with itself");
// Store the previous node as an inverted list link
self.node_states[node] = NodeState::InCycleWith { parent: previous_node };
// Update to parent node.
previous_node = node;
node = parent;
}
}
};
// The states form a graph where up to one outgoing link is stored at
// each node. Initially in general,
//
// E
// ^
// |
// InCycleWith/BeingVisited/NotVisited
// |
// A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+
// |
// = node, previous_node
//
// After the first loop, this will look like
// E
// ^
// |
// InCycleWith/BeingVisited/NotVisited
// |
// +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+
// | | | |
// | InCycleWith | = node
// +-+ =previous_node
//
// Note in particular that A will be linked to itself in a self-cycle
// and no other self-cycles occur due to how InCycleWith is assigned in
// the find phase implemented by `walk_unvisited_node`.
//
// We now want to compress the path, that is assign the state of the
// link D-E to all other links.
//
// We can then walk backwards, starting from `previous_node`, and assign
// each node in the list with the updated state. The loop terminates
// when we reach the self-cycle.
// Move backwards until we found the node where we started. We
// will know when we hit the state where previous_node == node.
loop {
// Back at the beginning, we can return.
if previous_node == node {
return node_state;
}
// Update to previous node in the link.
match self.node_states[previous_node] {
NodeState::InCycleWith { parent: previous } => {
node = previous_node;
previous_node = previous;
}
// Only InCycleWith nodes were added to the reverse linked list.
other => panic!("Invalid previous link while compressing cycle: {other:?}"),
}
debug!("find_state: parent_state = {:?}", node_state);
// Update the node state from the parent state. The assigned
// state is actually a loop invariant but it will only be
// evaluated if there is at least one backlink to follow.
// Fully trusting llvm here to find this loop optimization.
match node_state {
// Path compression, make current node point to the same root.
NodeState::InCycle { .. } => {
self.node_states[node] = node_state;
}
// Still visiting nodes, compress to cycle to the node
// at that depth.
NodeState::BeingVisited { depth } => {
self.node_states[node] =
NodeState::InCycleWith { parent: self.node_stack[depth] };
}
// These are never allowed as parent nodes. InCycleWith
// should have been followed to a real parent and
// NotVisited can not be part of a cycle since it should
// have instead gotten explored.
NodeState::NotVisited | NodeState::InCycleWith { .. } => {
panic!("invalid parent state: {node_state:?}")
}
}
}
}
/// Walks a node that has never been visited before.
///
/// Call this method when `inspect_node` has returned `None`. Having the
/// caller decide avoids mutual recursion between the two methods and allows
/// us to maintain an allocated stack for nodes on the path between calls.
#[instrument(skip(self, initial), level = "debug")]
fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S> {
struct VisitingNodeFrame<G: DirectedGraph, Successors> {
node: G::Node,
iter: Option<Successors>,
depth: usize,
min_depth: usize,
successors_len: usize,
min_cycle_root: G::Node,
successor_node: G::Node,
}
// Move the stack to a local variable. We want to utilize the existing allocation and
// mutably borrow it without borrowing self at the same time.
let mut successors_stack = core::mem::take(&mut self.successors_stack);
debug_assert_eq!(successors_stack.len(), 0);
let mut stack: Vec<VisitingNodeFrame<G, _>> = vec![VisitingNodeFrame {
node: initial,
depth: 0,
min_depth: 0,
iter: None,
successors_len: 0,
min_cycle_root: initial,
successor_node: initial,
}];
let mut return_value = None;
'recurse: while let Some(frame) = stack.last_mut() {
let VisitingNodeFrame {
node,
depth,
iter,
successors_len,
min_depth,
min_cycle_root,
successor_node,
} = frame;
let node = *node;
let depth = *depth;
let successors = match iter {
Some(iter) => iter,
None => {
// This None marks that we still have the initialize this node's frame.
debug!(?depth, ?node);
debug_assert!(matches!(self.node_states[node], NodeState::NotVisited));
// Push `node` onto the stack.
self.node_states[node] = NodeState::BeingVisited { depth };
self.node_stack.push(node);
// Walk each successor of the node, looking to see if any of
// them can reach a node that is presently on the stack. If
// so, that means they can also reach us.
*successors_len = successors_stack.len();
// Set and return a reference, this is currently empty.
iter.get_or_insert(self.graph.successors(node))
}
};
// Now that iter is initialized, this is a constant for this frame.
let successors_len = *successors_len;
// Construct iterators for the nodes and walk results. There are two cases:
// * The walk of a successor node returned.
// * The remaining successor nodes.
let returned_walk =
return_value.take().into_iter().map(|walk| (*successor_node, Some(walk)));
let successor_walk = successors.map(|successor_node| {
debug!(?node, ?successor_node);
(successor_node, self.inspect_node(successor_node))
});
for (successor_node, walk) in returned_walk.chain(successor_walk) {
match walk {
Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => {
// Track the minimum depth we can reach.
assert!(successor_min_depth <= depth);
if successor_min_depth < *min_depth {
debug!(?node, ?successor_min_depth);
*min_depth = successor_min_depth;
*min_cycle_root = successor_node;
}
}
Some(WalkReturn::Complete { scc_index: successor_scc_index }) => {
// Push the completed SCC indices onto
// the `successors_stack` for later.
debug!(?node, ?successor_scc_index);
successors_stack.push(successor_scc_index);
}
None => {
let depth = depth + 1;
debug!(?depth, ?successor_node);
// Remember which node the return value will come from.
frame.successor_node = successor_node;
// Start a new stack frame the step into it.
stack.push(VisitingNodeFrame {
node: successor_node,
depth,
iter: None,
successors_len: 0,
min_depth: depth,
min_cycle_root: successor_node,
successor_node,
});
continue 'recurse;
}
}
}
// Completed walk, remove `node` from the stack.
let r = self.node_stack.pop();
debug_assert_eq!(r, Some(node));
// Remove the frame, it's done.
let frame = stack.pop().unwrap();
// If `min_depth == depth`, then we are the root of the
// cycle: we can't reach anyone further down the stack.
// Pass the 'return value' down the stack.
// We return one frame at a time so there can't be another return value.
debug_assert!(return_value.is_none());
return_value = Some(if frame.min_depth == depth {
// Note that successor stack may have duplicates, so we
// want to remove those:
let deduplicated_successors = {
let duplicate_set = &mut self.duplicate_set;
duplicate_set.clear();
successors_stack
.drain(successors_len..)
.filter(move |&i| duplicate_set.insert(i))
};
let scc_index = self.scc_data.create_scc(deduplicated_successors);
self.node_states[node] = NodeState::InCycle { scc_index };
WalkReturn::Complete { scc_index }
} else {
// We are not the head of the cycle. Return back to our
// caller. They will take ownership of the
// `self.successors` data that we pushed.
self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root };
WalkReturn::Cycle { min_depth: frame.min_depth }
});
}
// Keep the allocation we used for successors_stack.
self.successors_stack = successors_stack;
debug_assert_eq!(self.successors_stack.len(), 0);
return_value.unwrap()
}
}