Auto merge of #64545 - nnethercote:ObligForest-more, r=nmatsakis

More `ObligationForest` improvements

Following on from #64500, these commits alsomake the code both nicer and faster.

r? @nikomatsakis
diff --git a/src/librustc/infer/mod.rs b/src/librustc/infer/mod.rs
index a886c44..c5712cc 100644
--- a/src/librustc/infer/mod.rs
+++ b/src/librustc/infer/mod.rs
@@ -1562,11 +1562,7 @@
         ShallowResolver { infcx }
     }
 
-    // We have this force-inlined variant of `shallow_resolve` for the one
-    // callsite that is extremely hot. All other callsites use the normal
-    // variant.
-    #[inline(always)]
-    pub fn inlined_shallow_resolve(&mut self, typ: Ty<'tcx>) -> Ty<'tcx> {
+    pub fn shallow_resolve(&mut self, typ: Ty<'tcx>) -> Ty<'tcx> {
         match typ.sty {
             ty::Infer(ty::TyVar(v)) => {
                 // Not entirely obvious: if `typ` is a type variable,
@@ -1601,6 +1597,42 @@
             _ => typ,
         }
     }
+
+    // `resolver.shallow_resolve_changed(ty)` is equivalent to
+    // `resolver.shallow_resolve(ty) != ty`, but more efficient. It's always
+    // inlined, despite being large, because it has a single call site that is
+    // extremely hot.
+    #[inline(always)]
+    pub fn shallow_resolve_changed(&mut self, typ: Ty<'tcx>) -> bool {
+        match typ.sty {
+            ty::Infer(ty::TyVar(v)) => {
+                use self::type_variable::TypeVariableValue;
+
+                // See the comment in `shallow_resolve()`.
+                match self.infcx.type_variables.borrow_mut().probe(v) {
+                    TypeVariableValue::Known { value: t } => self.fold_ty(t) != typ,
+                    TypeVariableValue::Unknown { .. } => false,
+                }
+            }
+
+            ty::Infer(ty::IntVar(v)) => {
+                match self.infcx.int_unification_table.borrow_mut().probe_value(v) {
+                    Some(v) => v.to_type(self.infcx.tcx) != typ,
+                    None => false,
+                }
+            }
+
+            ty::Infer(ty::FloatVar(v)) => {
+                match self.infcx.float_unification_table.borrow_mut().probe_value(v) {
+                    Some(v) => v.to_type(self.infcx.tcx) != typ,
+                    None => false,
+                }
+            }
+
+            _ => false,
+        }
+    }
+
 }
 
 impl<'a, 'tcx> TypeFolder<'tcx> for ShallowResolver<'a, 'tcx> {
@@ -1609,7 +1641,7 @@
     }
 
     fn fold_ty(&mut self, ty: Ty<'tcx>) -> Ty<'tcx> {
-        self.inlined_shallow_resolve(ty)
+        self.shallow_resolve(ty)
     }
 
     fn fold_const(&mut self, ct: &'tcx ty::Const<'tcx>) -> &'tcx ty::Const<'tcx> {
diff --git a/src/librustc/traits/fulfill.rs b/src/librustc/traits/fulfill.rs
index 4494c03..805727b 100644
--- a/src/librustc/traits/fulfill.rs
+++ b/src/librustc/traits/fulfill.rs
@@ -256,15 +256,20 @@
         &mut self,
         pending_obligation: &mut Self::Obligation,
     ) -> ProcessResult<Self::Obligation, Self::Error> {
-        // if we were stalled on some unresolved variables, first check
+        // If we were stalled on some unresolved variables, first check
         // whether any of them have been resolved; if not, don't bother
         // doing more work yet
         if !pending_obligation.stalled_on.is_empty() {
-            if pending_obligation.stalled_on.iter().all(|&ty| {
-                // Use the force-inlined variant of shallow_resolve() because this code is hot.
-                let resolved = ShallowResolver::new(self.selcx.infcx()).inlined_shallow_resolve(ty);
-                resolved == ty // nothing changed here
-            }) {
+            let mut changed = false;
+            // This `for` loop was once a call to `all()`, but this lower-level
+            // form was a perf win. See #64545 for details.
+            for &ty in &pending_obligation.stalled_on {
+                if ShallowResolver::new(self.selcx.infcx()).shallow_resolve_changed(ty) {
+                    changed = true;
+                    break;
+                }
+            }
+            if !changed {
                 debug!("process_predicate: pending obligation {:?} still stalled on {:?}",
                        self.selcx.infcx()
                            .resolve_vars_if_possible(&pending_obligation.obligation),
diff --git a/src/librustc_data_structures/obligation_forest/graphviz.rs b/src/librustc_data_structures/obligation_forest/graphviz.rs
index b2120b1..ddf89d9 100644
--- a/src/librustc_data_structures/obligation_forest/graphviz.rs
+++ b/src/librustc_data_structures/obligation_forest/graphviz.rs
@@ -74,9 +74,7 @@
             .flat_map(|i| {
                 let node = &self.nodes[i];
 
-                node.parent.iter()
-                    .chain(node.dependents.iter())
-                    .map(move |p| (p.index(), i))
+                node.dependents.iter().map(move |&d| (d, i))
             })
             .collect()
     }
diff --git a/src/librustc_data_structures/obligation_forest/mod.rs b/src/librustc_data_structures/obligation_forest/mod.rs
index 189506b..98ae1a5 100644
--- a/src/librustc_data_structures/obligation_forest/mod.rs
+++ b/src/librustc_data_structures/obligation_forest/mod.rs
@@ -73,8 +73,6 @@
 //! aren't needed anymore.
 
 use crate::fx::{FxHashMap, FxHashSet};
-use crate::indexed_vec::Idx;
-use crate::newtype_index;
 
 use std::cell::{Cell, RefCell};
 use std::collections::hash_map::Entry;
@@ -87,10 +85,6 @@
 #[cfg(test)]
 mod tests;
 
-newtype_index! {
-    pub struct NodeIndex { .. }
-}
-
 pub trait ForestObligation : Clone + Debug {
     type Predicate : Clone + hash::Hash + Eq + Debug;
 
@@ -143,9 +137,10 @@
     /// At the end of processing, those nodes will be removed by a
     /// call to `compress`.
     ///
-    /// Ideally, this would be an `IndexVec<NodeIndex, Node<O>>`. But that is
-    /// slower, because this vector is accessed so often that the
-    /// `u32`-to-`usize` conversions required for accesses are significant.
+    /// `usize` indices are used here and throughout this module, rather than
+    /// `newtype_index!` indices, because this code is hot enough that the
+    /// `u32`-to-`usize` conversions that would be required are significant,
+    /// and space considerations are not important.
     nodes: Vec<Node<O>>,
 
     /// A cache of predicates that have been successfully completed.
@@ -154,7 +149,7 @@
     /// A cache of the nodes in `nodes`, indexed by predicate. Unfortunately,
     /// its contents are not guaranteed to match those of `nodes`. See the
     /// comments in `process_obligation` for details.
-    waiting_cache: FxHashMap<O::Predicate, NodeIndex>,
+    waiting_cache: FxHashMap<O::Predicate, usize>,
 
     /// A scratch vector reused in various operations, to avoid allocating new
     /// vectors.
@@ -177,20 +172,16 @@
     obligation: O,
     state: Cell<NodeState>,
 
-    /// The parent of a node - the original obligation of which it is a
-    /// subobligation. Except for error reporting, it is just like any member
-    /// of `dependents`.
-    ///
-    /// Unlike `ObligationForest::nodes`, this uses `NodeIndex` rather than
-    /// `usize` for the index, because keeping the size down is more important
-    /// than the cost of converting to a `usize` for indexing.
-    parent: Option<NodeIndex>,
-
     /// Obligations that depend on this obligation for their completion. They
     /// must all be in a non-pending state.
-    ///
-    /// This uses `NodeIndex` for the same reason as `parent`.
-    dependents: Vec<NodeIndex>,
+    dependents: Vec<usize>,
+
+    /// If true, dependents[0] points to a "parent" node, which requires
+    /// special treatment upon error but is otherwise treated the same.
+    /// (It would be more idiomatic to store the parent node in a separate
+    /// `Option<usize>` field, but that slows down the common case of
+    /// iterating over the parent and other descendants together.)
+    has_parent: bool,
 
     /// Identifier of the obligation tree to which this node belongs.
     obligation_tree_id: ObligationTreeId,
@@ -198,15 +189,20 @@
 
 impl<O> Node<O> {
     fn new(
-        parent: Option<NodeIndex>,
+        parent: Option<usize>,
         obligation: O,
         obligation_tree_id: ObligationTreeId
     ) -> Node<O> {
         Node {
             obligation,
             state: Cell::new(NodeState::Pending),
-            parent,
-            dependents: vec![],
+            dependents:
+                if let Some(parent_index) = parent {
+                    vec![parent_index]
+                } else {
+                    vec![]
+                },
+            has_parent: parent.is_some(),
             obligation_tree_id,
         }
     }
@@ -302,9 +298,7 @@
     }
 
     // Returns Err(()) if we already know this obligation failed.
-    fn register_obligation_at(&mut self, obligation: O, parent: Option<NodeIndex>)
-                              -> Result<(), ()>
-    {
+    fn register_obligation_at(&mut self, obligation: O, parent: Option<usize>) -> Result<(), ()> {
         if self.done_cache.contains(obligation.as_predicate()) {
             return Ok(());
         }
@@ -313,15 +307,13 @@
             Entry::Occupied(o) => {
                 debug!("register_obligation_at({:?}, {:?}) - duplicate of {:?}!",
                        obligation, parent, o.get());
-                let node = &mut self.nodes[o.get().index()];
+                let node = &mut self.nodes[*o.get()];
                 if let Some(parent_index) = parent {
-                    // If the node is already in `waiting_cache`, it's already
-                    // been marked with a parent. (It's possible that parent
-                    // has been cleared by `apply_rewrites`, though.) So just
-                    // dump `parent` into `node.dependents`... unless it's
-                    // already in `node.dependents` or `node.parent`.
-                    if !node.dependents.contains(&parent_index) &&
-                       Some(parent_index) != node.parent {
+                    // If the node is already in `waiting_cache`, it has
+                    // already had its chance to be marked with a parent. So if
+                    // it's not already present, just dump `parent` into the
+                    // dependents as a non-parent.
+                    if !node.dependents.contains(&parent_index) {
                         node.dependents.push(parent_index);
                     }
                 }
@@ -336,10 +328,8 @@
                        obligation, parent, self.nodes.len());
 
                 let obligation_tree_id = match parent {
-                    Some(parent_index) => {
-                        self.nodes[parent_index.index()].obligation_tree_id
-                    }
-                    None => self.obligation_tree_id_generator.next().unwrap()
+                    Some(parent_index) => self.nodes[parent_index].obligation_tree_id,
+                    None => self.obligation_tree_id_generator.next().unwrap(),
                 };
 
                 let already_failed =
@@ -352,7 +342,7 @@
                 if already_failed {
                     Err(())
                 } else {
-                    v.insert(NodeIndex::new(self.nodes.len()));
+                    v.insert(self.nodes.len());
                     self.nodes.push(Node::new(parent, obligation, obligation_tree_id));
                     Ok(())
                 }
@@ -363,9 +353,9 @@
     /// Converts all remaining obligations to the given error.
     pub fn to_errors<E: Clone>(&mut self, error: E) -> Vec<Error<O, E>> {
         let mut errors = vec![];
-        for (i, node) in self.nodes.iter().enumerate() {
+        for (index, node) in self.nodes.iter().enumerate() {
             if let NodeState::Pending = node.state.get() {
-                let backtrace = self.error_at(i);
+                let backtrace = self.error_at(index);
                 errors.push(Error {
                     error: error.clone(),
                     backtrace,
@@ -409,10 +399,10 @@
         let mut errors = vec![];
         let mut stalled = true;
 
-        for i in 0..self.nodes.len() {
-            let node = &mut self.nodes[i];
+        for index in 0..self.nodes.len() {
+            let node = &mut self.nodes[index];
 
-            debug!("process_obligations: node {} == {:?}", i, node);
+            debug!("process_obligations: node {} == {:?}", index, node);
 
             // `processor.process_obligation` can modify the predicate within
             // `node.obligation`, and that predicate is the key used for
@@ -424,7 +414,7 @@
                 _ => continue
             };
 
-            debug!("process_obligations: node {} got result {:?}", i, result);
+            debug!("process_obligations: node {} got result {:?}", index, result);
 
             match result {
                 ProcessResult::Unchanged => {
@@ -438,18 +428,18 @@
                     for child in children {
                         let st = self.register_obligation_at(
                             child,
-                            Some(NodeIndex::new(i))
+                            Some(index)
                         );
                         if let Err(()) = st {
                             // Error already reported - propagate it
                             // to our node.
-                            self.error_at(i);
+                            self.error_at(index);
                         }
                     }
                 }
                 ProcessResult::Error(err) => {
                     stalled = false;
-                    let backtrace = self.error_at(i);
+                    let backtrace = self.error_at(index);
                     errors.push(Error {
                         error: err,
                         backtrace,
@@ -493,14 +483,14 @@
 
         debug!("process_cycles()");
 
-        for (i, node) in self.nodes.iter().enumerate() {
+        for (index, node) in self.nodes.iter().enumerate() {
             // For rustc-benchmarks/inflate-0.1.0 this state test is extremely
             // hot and the state is almost always `Pending` or `Waiting`. It's
             // a win to handle the no-op cases immediately to avoid the cost of
             // the function call.
             match node.state.get() {
                 NodeState::Waiting | NodeState::Pending | NodeState::Done | NodeState::Error => {},
-                _ => self.find_cycles_from_node(&mut stack, processor, i),
+                _ => self.find_cycles_from_node(&mut stack, processor, index),
             }
         }
 
@@ -510,21 +500,21 @@
         self.scratch.replace(stack);
     }
 
-    fn find_cycles_from_node<P>(&self, stack: &mut Vec<usize>, processor: &mut P, i: usize)
+    fn find_cycles_from_node<P>(&self, stack: &mut Vec<usize>, processor: &mut P, index: usize)
         where P: ObligationProcessor<Obligation=O>
     {
-        let node = &self.nodes[i];
+        let node = &self.nodes[index];
         match node.state.get() {
             NodeState::OnDfsStack => {
-                let i = stack.iter().rposition(|n| *n == i).unwrap();
-                processor.process_backedge(stack[i..].iter().map(GetObligation(&self.nodes)),
+                let index = stack.iter().rposition(|&n| n == index).unwrap();
+                processor.process_backedge(stack[index..].iter().map(GetObligation(&self.nodes)),
                                            PhantomData);
             }
             NodeState::Success => {
                 node.state.set(NodeState::OnDfsStack);
-                stack.push(i);
-                for index in node.parent.iter().chain(node.dependents.iter()) {
-                    self.find_cycles_from_node(stack, processor, index.index());
+                stack.push(index);
+                for &index in node.dependents.iter() {
+                    self.find_cycles_from_node(stack, processor, index);
                 }
                 stack.pop();
                 node.state.set(NodeState::Done);
@@ -541,33 +531,34 @@
 
     /// Returns a vector of obligations for `p` and all of its
     /// ancestors, putting them into the error state in the process.
-    fn error_at(&self, mut i: usize) -> Vec<O> {
+    fn error_at(&self, mut index: usize) -> Vec<O> {
         let mut error_stack = self.scratch.replace(vec![]);
         let mut trace = vec![];
 
         loop {
-            let node = &self.nodes[i];
+            let node = &self.nodes[index];
             node.state.set(NodeState::Error);
             trace.push(node.obligation.clone());
-            error_stack.extend(node.dependents.iter().map(|index| index.index()));
-
-            // Loop to the parent.
-            match node.parent {
-                Some(parent_index) => i = parent_index.index(),
-                None => break
+            if node.has_parent {
+                // The first dependent is the parent, which is treated
+                // specially.
+                error_stack.extend(node.dependents.iter().skip(1));
+                index = node.dependents[0];
+            } else {
+                // No parent; treat all dependents non-specially.
+                error_stack.extend(node.dependents.iter());
+                break;
             }
         }
 
-        while let Some(i) = error_stack.pop() {
-            let node = &self.nodes[i];
+        while let Some(index) = error_stack.pop() {
+            let node = &self.nodes[index];
             match node.state.get() {
                 NodeState::Error => continue,
                 _ => node.state.set(NodeState::Error),
             }
 
-            error_stack.extend(
-                node.parent.iter().chain(node.dependents.iter()).map(|index| index.index())
-            );
+            error_stack.extend(node.dependents.iter());
         }
 
         self.scratch.replace(error_stack);
@@ -577,8 +568,8 @@
     // This always-inlined function is for the hot call site.
     #[inline(always)]
     fn inlined_mark_neighbors_as_waiting_from(&self, node: &Node<O>) {
-        for dependent in node.parent.iter().chain(node.dependents.iter()) {
-            self.mark_as_waiting_from(&self.nodes[dependent.index()]);
+        for &index in node.dependents.iter() {
+            self.mark_as_waiting_from(&self.nodes[index]);
         }
     }
 
@@ -631,16 +622,16 @@
         // Now move all popped nodes to the end. Try to keep the order.
         //
         // LOOP INVARIANT:
-        //     self.nodes[0..i - dead_nodes] are the first remaining nodes
-        //     self.nodes[i - dead_nodes..i] are all dead
-        //     self.nodes[i..] are unchanged
-        for i in 0..self.nodes.len() {
-            let node = &self.nodes[i];
+        //     self.nodes[0..index - dead_nodes] are the first remaining nodes
+        //     self.nodes[index - dead_nodes..index] are all dead
+        //     self.nodes[index..] are unchanged
+        for index in 0..self.nodes.len() {
+            let node = &self.nodes[index];
             match node.state.get() {
                 NodeState::Pending | NodeState::Waiting => {
                     if dead_nodes > 0 {
-                        self.nodes.swap(i, i - dead_nodes);
-                        node_rewrites[i] -= dead_nodes;
+                        self.nodes.swap(index, index - dead_nodes);
+                        node_rewrites[index] -= dead_nodes;
                     }
                 }
                 NodeState::Done => {
@@ -655,7 +646,7 @@
                     } else {
                         self.done_cache.insert(node.obligation.as_predicate().clone());
                     }
-                    node_rewrites[i] = nodes_len;
+                    node_rewrites[index] = nodes_len;
                     dead_nodes += 1;
                 }
                 NodeState::Error => {
@@ -663,9 +654,9 @@
                     // tests must come up with a different type on every type error they
                     // check against.
                     self.waiting_cache.remove(node.obligation.as_predicate());
-                    node_rewrites[i] = nodes_len;
+                    node_rewrites[index] = nodes_len;
                     dead_nodes += 1;
-                    self.insert_into_error_cache(i);
+                    self.insert_into_error_cache(index);
                 }
                 NodeState::OnDfsStack | NodeState::Success => unreachable!()
             }
@@ -706,23 +697,18 @@
         let nodes_len = node_rewrites.len();
 
         for node in &mut self.nodes {
-            if let Some(index) = node.parent {
-                let new_i = node_rewrites[index.index()];
-                if new_i >= nodes_len {
-                    node.parent = None;
+            let mut index = 0;
+            while index < node.dependents.len() {
+                let new_index = node_rewrites[node.dependents[index]];
+                if new_index >= nodes_len {
+                    node.dependents.swap_remove(index);
+                    if index == 0 && node.has_parent {
+                        // We just removed the parent.
+                        node.has_parent = false;
+                    }
                 } else {
-                    node.parent = Some(NodeIndex::new(new_i));
-                }
-            }
-
-            let mut i = 0;
-            while i < node.dependents.len() {
-                let new_i = node_rewrites[node.dependents[i].index()];
-                if new_i >= nodes_len {
-                    node.dependents.swap_remove(i);
-                } else {
-                    node.dependents[i] = NodeIndex::new(new_i);
-                    i += 1;
+                    node.dependents[index] = new_index;
+                    index += 1;
                 }
             }
         }
@@ -730,11 +716,11 @@
         // This updating of `self.waiting_cache` is necessary because the
         // removal of nodes within `compress` can fail. See above.
         self.waiting_cache.retain(|_predicate, index| {
-            let new_i = node_rewrites[index.index()];
-            if new_i >= nodes_len {
+            let new_index = node_rewrites[*index];
+            if new_index >= nodes_len {
                 false
             } else {
-                *index = NodeIndex::new(new_i);
+                *index = new_index;
                 true
             }
         });