[ubsan] Teach the pointer overflow check that "p - <unsigned> <= p" (PR33430)

The pointer overflow check gives false negatives when dealing with
expressions in which an unsigned value is subtracted from a pointer.
This is summarized in PR33430 [1]: ubsan permits the result of the
subtraction to be greater than "p", but it should not.

To fix the issue, we should track whether or not the pointer expression
is a subtraction. If it is, and the indices are unsigned, we know to
expect "p - <unsigned> <= p".

I've tested this by running check-{llvm,clang} with a stage2 ubsan-enabled build. I've also added some tests to compiler-rt, which I'll upload in a separate patch.

[1] https://bugs.llvm.org/show_bug.cgi?id=33430
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index ef6101e..135c4e4 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -2957,7 +2957,8 @@
                                           SourceLocation loc,
                                     const llvm::Twine &name = "arrayidx") {
   if (inbounds) {
-    return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, loc, name);
+    return CGF.EmitCheckedInBoundsGEP(
+        ptr, indices, signedIndices, CodeGenFunction::IsSubtraction, loc, name);
   } else {
     return CGF.Builder.CreateGEP(ptr, indices, name);
   }
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index d916f3e..3dfa0b9 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -1836,7 +1836,7 @@
   llvm::Value *input;
 
   int amount = (isInc ? 1 : -1);
-  bool signedIndex = !isInc;
+  bool isSubtraction = !isInc;
 
   if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
     type = atomicTy->getValueType();
@@ -1926,8 +1926,9 @@
       if (CGF.getLangOpts().isSignedOverflowDefined())
         value = Builder.CreateGEP(value, numElts, "vla.inc");
       else
-        value = CGF.EmitCheckedInBoundsGEP(value, numElts, signedIndex,
-                                           E->getExprLoc(), "vla.inc");
+        value = CGF.EmitCheckedInBoundsGEP(
+            value, numElts, /*SignedIndices=*/false, isSubtraction,
+            E->getExprLoc(), "vla.inc");
 
     // Arithmetic on function pointers (!) is just +-1.
     } else if (type->isFunctionType()) {
@@ -1937,8 +1938,9 @@
       if (CGF.getLangOpts().isSignedOverflowDefined())
         value = Builder.CreateGEP(value, amt, "incdec.funcptr");
       else
-        value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex,
-                                           E->getExprLoc(), "incdec.funcptr");
+        value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false,
+                                           isSubtraction, E->getExprLoc(),
+                                           "incdec.funcptr");
       value = Builder.CreateBitCast(value, input->getType());
 
     // For everything else, we can just do a simple increment.
@@ -1947,8 +1949,9 @@
       if (CGF.getLangOpts().isSignedOverflowDefined())
         value = Builder.CreateGEP(value, amt, "incdec.ptr");
       else
-        value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex,
-                                           E->getExprLoc(), "incdec.ptr");
+        value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false,
+                                           isSubtraction, E->getExprLoc(),
+                                           "incdec.ptr");
     }
 
   // Vector increment/decrement.
@@ -2029,7 +2032,8 @@
     if (CGF.getLangOpts().isSignedOverflowDefined())
       value = Builder.CreateGEP(value, sizeValue, "incdec.objptr");
     else
-      value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, signedIndex,
+      value = CGF.EmitCheckedInBoundsGEP(value, sizeValue,
+                                         /*SignedIndices=*/false, isSubtraction,
                                          E->getExprLoc(), "incdec.objptr");
     value = Builder.CreateBitCast(value, input->getType());
   }
@@ -2648,7 +2652,6 @@
   }
 
   bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType();
-  bool mayHaveNegativeGEPIndex = isSigned || isSubtraction;
 
   unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth();
   auto &DL = CGF.CGM.getDataLayout();
@@ -2700,7 +2703,7 @@
     } else {
       index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index");
       pointer =
-          CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex,
+          CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction,
                                      op.E->getExprLoc(), "add.ptr");
     }
     return pointer;
@@ -2718,7 +2721,7 @@
   if (CGF.getLangOpts().isSignedOverflowDefined())
     return CGF.Builder.CreateGEP(pointer, index, "add.ptr");
 
-  return CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex,
+  return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction,
                                     op.E->getExprLoc(), "add.ptr");
 }
 
@@ -2792,7 +2795,7 @@
 Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
   if (op.LHS->getType()->isPointerTy() ||
       op.RHS->getType()->isPointerTy())
-    return emitPointerArithmetic(CGF, op, /*subtraction*/ false);
+    return emitPointerArithmetic(CGF, op, CodeGenFunction::NotSubtraction);
 
   if (op.Ty->isSignedIntegerOrEnumerationType()) {
     switch (CGF.getLangOpts().getSignedOverflowBehavior()) {
@@ -2863,7 +2866,7 @@
   // If the RHS is not a pointer, then we have normal pointer
   // arithmetic.
   if (!op.RHS->getType()->isPointerTy())
-    return emitPointerArithmetic(CGF, op, /*subtraction*/ true);
+    return emitPointerArithmetic(CGF, op, CodeGenFunction::IsSubtraction);
 
   // Otherwise, this is a pointer subtraction.
 
@@ -3831,6 +3834,7 @@
 Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr,
                                                ArrayRef<Value *> IdxList,
                                                bool SignedIndices,
+                                               bool IsSubtraction,
                                                SourceLocation Loc,
                                                const Twine &Name) {
   Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name);
@@ -3936,15 +3940,19 @@
   // pointer matches the sign of the total offset.
   llvm::Value *ValidGEP;
   auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows);
-  auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
   if (SignedIndices) {
+    auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
     auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero);
     llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr);
     ValidGEP = Builder.CreateAnd(
         Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid),
         NoOffsetOverflow);
-  } else {
+  } else if (!SignedIndices && !IsSubtraction) {
+    auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
     ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow);
+  } else {
+    auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr);
+    ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow);
   }
 
   llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)};
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 7b627a5..64c9a65 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -3469,12 +3469,19 @@
   /// nonnull, if \p LHS is marked _Nonnull.
   void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc);
 
+  /// An enumeration which makes it easier to specify whether or not an
+  /// operation is a subtraction.
+  enum { NotSubtraction = false, IsSubtraction = true };
+
   /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to
   /// detect undefined behavior when the pointer overflow sanitizer is enabled.
   /// \p SignedIndices indicates whether any of the GEP indices are signed.
+  /// \p IsSubtraction indicates whether the expression used to form the GEP
+  /// is a subtraction.
   llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr,
                                       ArrayRef<llvm::Value *> IdxList,
                                       bool SignedIndices,
+                                      bool IsSubtraction,
                                       SourceLocation Loc,
                                       const Twine &Name = "");
 
diff --git a/test/CodeGen/ubsan-pointer-overflow.m b/test/CodeGen/ubsan-pointer-overflow.m
index da62235..8b9633d 100644
--- a/test/CodeGen/ubsan-pointer-overflow.m
+++ b/test/CodeGen/ubsan-pointer-overflow.m
@@ -10,16 +10,20 @@
   ++p;
 
   // CHECK: ptrtoint i8* {{.*}} to i64, !nosanitize
-  // CHECK-NEXT: add i64 {{.*}}, -1, !nosanitize
-  // CHECK: select i1 false{{.*}}, !nosanitize
+  // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 {{.*}}, -1, !nosanitize
+  // CHECK: [[NEGVALID:%.*]] = icmp ule i64 [[COMPGEP]], {{.*}}, !nosanitize
+  // CHECK-NOT: select
+  // CHECK: br i1 [[NEGVALID]]{{.*}}, !nosanitize
   // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
   --p;
 
+  // CHECK: icmp uge i64
   // CHECK-NOT: select
   // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
   p++;
 
-  // CHECK: select
+  // CHECK: icmp ule i64
+  // CHECK-NOT: select
   // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
   p--;
 }
@@ -64,7 +68,8 @@
 
   // CHECK: [[OFFSET:%.*]] = sub i64 0, {{.*}}
   // CHECK-NEXT: getelementptr inbounds {{.*}} [[OFFSET]]
-  // CHECK: select
+  // CHECK: icmp ule i64
+  // CHECK-NOT: select
   // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
   p - i;
 }