Add a specialized version of `shallow_resolve()`.

The super-hot call site of `inlined_shallow_resolve()` basically does
`r.inlined_shallow_resolve(ty) != ty`. This commit introduces a
version of that function specialized for that particular call pattern,
`shallow_resolve_changed()`. Incredibly, this reduces the instruction
count for `keccak` by 5%.

The commit also renames `inlined_shallow_resolve()` as
`shallow_resolve()` and removes the `inline(always)` annotation, as it's
no longer nearly so hot.
diff --git a/src/librustc/infer/mod.rs b/src/librustc/infer/mod.rs
index 8638f42..46a364c 100644
--- a/src/librustc/infer/mod.rs
+++ b/src/librustc/infer/mod.rs
@@ -1558,11 +1558,7 @@
         ShallowResolver { infcx }
     }
 
-    // We have this force-inlined variant of `shallow_resolve` for the one
-    // callsite that is extremely hot. All other callsites use the normal
-    // variant.
-    #[inline(always)]
-    pub fn inlined_shallow_resolve(&mut self, typ: Ty<'tcx>) -> Ty<'tcx> {
+    pub fn shallow_resolve(&mut self, typ: Ty<'tcx>) -> Ty<'tcx> {
         match typ.sty {
             ty::Infer(ty::TyVar(v)) => {
                 // Not entirely obvious: if `typ` is a type variable,
@@ -1597,6 +1593,42 @@
             _ => typ,
         }
     }
+
+    // `resolver.shallow_resolve_changed(ty)` is equivalent to
+    // `resolver.shallow_resolve(ty) != ty`, but more efficient. It's always
+    // inlined, despite being large, because it has a single call site that is
+    // extremely hot.
+    #[inline(always)]
+    pub fn shallow_resolve_changed(&mut self, typ: Ty<'tcx>) -> bool {
+        match typ.sty {
+            ty::Infer(ty::TyVar(v)) => {
+                use self::type_variable::TypeVariableValue;
+
+                // See the comment in `shallow_resolve()`.
+                match self.infcx.type_variables.borrow_mut().probe(v) {
+                    TypeVariableValue::Known { value: t } => self.fold_ty(t) != typ,
+                    TypeVariableValue::Unknown { .. } => false,
+                }
+            }
+
+            ty::Infer(ty::IntVar(v)) => {
+                match self.infcx.int_unification_table.borrow_mut().probe_value(v) {
+                    Some(v) => v.to_type(self.infcx.tcx) != typ,
+                    None => false,
+                }
+            }
+
+            ty::Infer(ty::FloatVar(v)) => {
+                match self.infcx.float_unification_table.borrow_mut().probe_value(v) {
+                    Some(v) => v.to_type(self.infcx.tcx) != typ,
+                    None => false,
+                }
+            }
+
+            _ => false,
+        }
+    }
+
 }
 
 impl<'a, 'tcx> TypeFolder<'tcx> for ShallowResolver<'a, 'tcx> {
@@ -1605,7 +1637,7 @@
     }
 
     fn fold_ty(&mut self, ty: Ty<'tcx>) -> Ty<'tcx> {
-        self.inlined_shallow_resolve(ty)
+        self.shallow_resolve(ty)
     }
 
     fn fold_const(&mut self, ct: &'tcx ty::Const<'tcx>) -> &'tcx ty::Const<'tcx> {
diff --git a/src/librustc/traits/fulfill.rs b/src/librustc/traits/fulfill.rs
index 5eaaeca..805727b 100644
--- a/src/librustc/traits/fulfill.rs
+++ b/src/librustc/traits/fulfill.rs
@@ -264,9 +264,7 @@
             // This `for` loop was once a call to `all()`, but this lower-level
             // form was a perf win. See #64545 for details.
             for &ty in &pending_obligation.stalled_on {
-                // Use the force-inlined variant of shallow_resolve() because this code is hot.
-                let resolved = ShallowResolver::new(self.selcx.infcx()).inlined_shallow_resolve(ty);
-                if resolved != ty {
+                if ShallowResolver::new(self.selcx.infcx()).shallow_resolve_changed(ty) {
                     changed = true;
                     break;
                 }