[ScopBuilder] Fix bug 38358 by preserving correct order of ScopStmts.

ScopBuilder::buildEqivClassBlockStmts creates ScopStmts for instruction
groups in basic block and inserts these ScopStmts into Scop::StmtMap,
however, as described in llvm.org/PR38358, comment #5, StmtScops are
inserted into vector ScopStmt[BB] in wrong order.  As a result,
ScopBuilder::buildSchedule creates wrong order sequence node.

Looking closer to code, it's clear there is no equivalent classes with
interleaving isOrderedInstruction(memory access) instructions after
joinOrderedInstructions.  Afterwards, ScopStmts need to be created and
inserted in the original order of memory access instructions, however,
at the moment ScopStmts are inserted in the order of leader instructions
which are probably not memory access instructions.

The fix is simple with a standalone loop scanning
isOrderedInstruction(memory access) instructions in basic block and
inserting elements into LeaderToInstList one by one.  The patch also
removes double reversing operations which are now unnecessary.

New test preserve-equiv-class-order-in-basic_block.ll is also added.

Differential Revision: https://reviews.llvm.org/D68941

git-svn-id: https://llvm.org/svn/llvm-project/polly/trunk@375192 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Analysis/ScopBuilder.cpp b/lib/Analysis/ScopBuilder.cpp
index 554e9a8..db7bea1 100644
--- a/lib/Analysis/ScopBuilder.cpp
+++ b/lib/Analysis/ScopBuilder.cpp
@@ -2119,14 +2119,27 @@
   joinOrderedPHIs(UnionFind, ModeledInsts);
 
   // The list of instructions for statement (statement represented by the leader
-  // instruction). The order of statements instructions is reversed such that
-  // the epilogue is first. This makes it easier to ensure that the epilogue is
-  // the last statement.
+  // instruction).
   MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
 
+  // The order of statements must be preserved w.r.t. their ordered
+  // instructions. Without this explicit scan, we would also use non-ordered
+  // instructions (whose order is arbitrary) to determine statement order.
+  for (Instruction &Inst : *BB) {
+    if (!isOrderedInstruction(&Inst))
+      continue;
+
+    auto LeaderIt = UnionFind.findLeader(&Inst);
+    if (LeaderIt == UnionFind.member_end())
+      continue;
+
+    // Insert element for the leader instruction.
+    (void)LeaderToInstList[*LeaderIt];
+  }
+
   // Collect the instructions of all leaders. UnionFind's member iterator
   // unfortunately are not in any specific order.
-  for (Instruction &Inst : reverse(*BB)) {
+  for (Instruction &Inst : *BB) {
     auto LeaderIt = UnionFind.findLeader(&Inst);
     if (LeaderIt == UnionFind.member_end())
       continue;
@@ -2140,13 +2153,12 @@
   // Finally build the statements.
   int Count = 0;
   long BBIdx = scop->getNextStmtIdx();
-  for (auto &Instructions : reverse(LeaderToInstList)) {
+  for (auto &Instructions : LeaderToInstList) {
     std::vector<Instruction *> &InstList = Instructions.second;
 
     // If there is no main instruction, make the first statement the main.
     bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
 
-    std::reverse(InstList.begin(), InstList.end());
     std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
     scop->addScopStmt(BB, Name, L, std::move(InstList));
     Count += 1;
diff --git a/test/ScopInfo/preserve-equiv-class-order-in-basic_block.ll b/test/ScopInfo/preserve-equiv-class-order-in-basic_block.ll
new file mode 100644
index 0000000..2e7ad13
--- /dev/null
+++ b/test/ScopInfo/preserve-equiv-class-order-in-basic_block.ll
@@ -0,0 +1,94 @@
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+
+@b = dso_local local_unnamed_addr global i32 1, align 4
+@e = dso_local local_unnamed_addr global i32 3, align 4
+@a = common dso_local local_unnamed_addr global [56 x i32] zeroinitializer, align 16
+@f = common dso_local local_unnamed_addr global i16 0, align 2
+@d = common dso_local local_unnamed_addr global i8 0, align 1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @func() {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry.split
+  %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [56 x i32], [56 x i32]* @a, i64 0, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 56
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %1 = load i32, i32* @e, align 4, !tbaa !0
+  store i32 2, i32* @e, align 4, !tbaa !0
+  %2 = trunc i32 %1 to i16
+  %conv = and i16 %2, 1
+  %tobool = icmp eq i16 %conv, 0
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.end, %for.inc11
+  %storemerge20 = phi i32 [ 2, %for.end ], [ %dec12, %for.inc11 ]
+  %3 = load i8, i8* @d, align 1
+  %cmp6 = icmp eq i8 %3, 8
+  %or.cond = or i1 %tobool, %cmp6
+  br i1 %or.cond, label %for.inc11, label %for.inc11.loopexit
+
+for.inc11.loopexit:                               ; preds = %for.body3
+  store i32 0, i32* @b, align 4, !tbaa !0
+  store i8 8, i8* @d, align 1, !tbaa !4
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.inc11.loopexit, %for.body3
+  %dec12 = add nsw i32 %storemerge20, -1
+  %cmp2 = icmp sgt i32 %storemerge20, -18
+  br i1 %cmp2, label %for.body3, label %for.end13
+
+for.end13:                                        ; preds = %for.inc11
+  store i16 %conv, i16* @f, align 2, !tbaa !5
+  store i32 -19, i32* @e, align 4, !tbaa !0
+  %4 = load i32, i32* @b, align 4, !tbaa !0
+  ret i32 %4
+}
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"int", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!2, !2, i64 0}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"short", !2, i64 0}
+
+; CHECK:    	Stmt_for_end_a
+; CHECK-NEXT:            Domain :=
+; CHECK-NEXT:                { Stmt_for_end_a[] };
+; CHECK-NEXT:            Schedule :=
+; CHECK-NEXT:                { Stmt_for_end_a[] -> [1, 0] };
+; CHECK-NEXT:            ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                { Stmt_for_end_a[] -> MemRef_e[0] };
+; CHECK-NEXT:            MustWriteAccess :=	[Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                { Stmt_for_end_a[] -> MemRef_tobool[] };
+; CHECK-NEXT:            MustWriteAccess :=	[Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                { Stmt_for_end_a[] -> MemRef_conv[] };
+; CHECK-NEXT:            Instructions {
+; CHECK-NEXT:                  %1 = load i32, i32* @e, align 4, !tbaa !0
+; CHECK-NEXT:                  %2 = trunc i32 %1 to i16
+; CHECK-NEXT:                  %conv = and i16 %2, 1
+; CHECK-NEXT:                  %tobool = icmp eq i16 %conv, 0
+; CHECK-NEXT:            }
+; CHECK-NEXT:    	Stmt_for_end
+; CHECK-NEXT:            Domain :=
+; CHECK-NEXT:                { Stmt_for_end[] };
+; CHECK-NEXT:            Schedule :=
+; CHECK-NEXT:                { Stmt_for_end[] -> [2, 0] };
+; CHECK-NEXT:            MustWriteAccess :=	[Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                { Stmt_for_end[] -> MemRef_e[0] };
+; CHECK-NEXT:            Instructions {
+; CHECK-NEXT:                  store i32 2, i32* @e, align 4, !tbaa !0
+; CHECK-NEXT:            }