[RISCV] optimize addition with a pair of (addi imm)

For an addition with an immediate in specific ranges, a pair of
addi-addi can be generated instead of the ordinary lui-addi-add serial.

Reviewed By: MaskRay, luismarques

Differential Revision: https://reviews.llvm.org/D82262
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index e7584e4..a0ae0508 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -75,6 +75,30 @@
   EVT VT = Node->getValueType(0);
 
   switch (Opcode) {
+  case ISD::ADD: {
+    // Optimize (add r, imm) to (addi (addi r, imm0) imm1) if applicable. The
+    // immediate must be in specific ranges and have a single use.
+    if (auto *ConstOp = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
+      if (!(ConstOp->hasOneUse()))
+        break;
+      // The imm must be in range [-4096,-2049] or [2048,4094].
+      int64_t Imm = ConstOp->getSExtValue();
+      if (!(-4096 <= Imm && Imm <= -2049) && !(2048 <= Imm && Imm <= 4094))
+        break;
+      // Break the imm to imm0+imm1.
+      SDLoc DL(Node);
+      EVT VT = Node->getValueType(0);
+      const SDValue ImmOp0 = CurDAG->getTargetConstant(Imm - Imm / 2, DL, VT);
+      const SDValue ImmOp1 = CurDAG->getTargetConstant(Imm / 2, DL, VT);
+      auto *NodeAddi0 = CurDAG->getMachineNode(RISCV::ADDI, DL, VT,
+                                               Node->getOperand(0), ImmOp0);
+      auto *NodeAddi1 = CurDAG->getMachineNode(RISCV::ADDI, DL, VT,
+                                               SDValue(NodeAddi0, 0), ImmOp1);
+      ReplaceNode(Node, NodeAddi1);
+      return;
+    }
+    break;
+  }
   case ISD::Constant: {
     auto ConstNode = cast<ConstantSDNode>(Node);
     if (VT == XLenVT && ConstNode->isNullValue()) {
diff --git a/llvm/test/CodeGen/RISCV/add-imm.ll b/llvm/test/CodeGen/RISCV/add-imm.ll
new file mode 100644
index 0000000..2db13eb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/add-imm.ll
@@ -0,0 +1,209 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I %s
+
+; These test how the immediate in an addition is materialized.
+
+define i32 @add_positive_low_bound_reject(i32 %a) nounwind {
+; RV32I-LABEL: add_positive_low_bound_reject:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a0, a0, 2047
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_positive_low_bound_reject:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 2047
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 2047
+  ret i32 %1
+}
+
+define i32 @add_positive_low_bound_accept(i32 %a) nounwind {
+; RV32I-LABEL: add_positive_low_bound_accept:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a0, a0, 1024
+; RV32I-NEXT:    addi a0, a0, 1024
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_positive_low_bound_accept:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 1024
+; RV64I-NEXT:    addi a0, a0, 1024
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 2048
+  ret i32 %1
+}
+
+define i32 @add_positive_high_bound_accept(i32 %a) nounwind {
+; RV32I-LABEL: add_positive_high_bound_accept:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a0, a0, 2047
+; RV32I-NEXT:    addi a0, a0, 2047
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_positive_high_bound_accept:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 2047
+; RV64I-NEXT:    addi a0, a0, 2047
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 4094
+  ret i32 %1
+}
+
+define i32 @add_positive_high_bound_reject(i32 %a) nounwind {
+; RV32I-LABEL: add_positive_high_bound_reject:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 1
+; RV32I-NEXT:    addi a1, a1, -1
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_positive_high_bound_reject:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 4095
+  ret i32 %1
+}
+
+define i32 @add_negative_high_bound_reject(i32 %a) nounwind {
+; RV32I-LABEL: add_negative_high_bound_reject:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a0, a0, -2048
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_negative_high_bound_reject:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, -2048
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, -2048
+  ret i32 %1
+}
+
+define i32 @add_negative_high_bound_accept(i32 %a) nounwind {
+; RV32I-LABEL: add_negative_high_bound_accept:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a0, a0, -1025
+; RV32I-NEXT:    addi a0, a0, -1024
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_negative_high_bound_accept:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, -1025
+; RV64I-NEXT:    addi a0, a0, -1024
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, -2049
+  ret i32 %1
+}
+
+define i32 @add_negative_low_bound_accept(i32 %a) nounwind {
+; RV32I-LABEL: add_negative_low_bound_accept:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a0, a0, -2048
+; RV32I-NEXT:    addi a0, a0, -2048
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_negative_low_bound_accept:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, -2048
+; RV64I-NEXT:    addi a0, a0, -2048
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, -4096
+  ret i32 %1
+}
+
+define i32 @add_negative_low_bound_reject(i32 %a) nounwind {
+; RV32I-LABEL: add_negative_low_bound_reject:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 1048575
+; RV32I-NEXT:    addi a1, a1, -1
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_negative_low_bound_reject:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1048575
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, -4097
+  ret i32 %1
+}
+
+define i32 @add32_accept(i32 %a) nounwind {
+; RV32I-LABEL: add32_accept:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a0, a0, 1500
+; RV32I-NEXT:    addi a0, a0, 1499
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add32_accept:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 1500
+; RV64I-NEXT:    addi a0, a0, 1499
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 2999
+  ret i32 %1
+}
+
+define i64 @add64_accept(i64 %a) nounwind {
+; RV32I-LABEL: add64_accept:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a2, a0, 1500
+; RV32I-NEXT:    addi a2, a2, 1499
+; RV32I-NEXT:    sltu a0, a2, a0
+; RV32I-NEXT:    add a1, a1, a0
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add64_accept:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 1500
+; RV64I-NEXT:    addi a0, a0, 1499
+; RV64I-NEXT:    ret
+  %1 = add i64 %a, 2999
+  ret i64 %1
+}
+
+@ga = global i32 0, align 4
+@gb = global i32 0, align 4
+define void @add32_reject() nounwind {
+; RV32I-LABEL: add32_reject:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a0, %hi(ga)
+; RV32I-NEXT:    lw a1, %lo(ga)(a0)
+; RV32I-NEXT:    lui a2, %hi(gb)
+; RV32I-NEXT:    lw a3, %lo(gb)(a2)
+; RV32I-NEXT:    lui a4, 1
+; RV32I-NEXT:    addi a4, a4, -1096
+; RV32I-NEXT:    add a1, a1, a4
+; RV32I-NEXT:    add a3, a3, a4
+; RV32I-NEXT:    sw a1, %lo(ga)(a0)
+; RV32I-NEXT:    sw a3, %lo(gb)(a2)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add32_reject:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, %hi(ga)
+; RV64I-NEXT:    lw a1, %lo(ga)(a0)
+; RV64I-NEXT:    lui a2, %hi(gb)
+; RV64I-NEXT:    lw a3, %lo(gb)(a2)
+; RV64I-NEXT:    lui a4, 1
+; RV64I-NEXT:    addiw a4, a4, -1096
+; RV64I-NEXT:    add a1, a1, a4
+; RV64I-NEXT:    add a3, a3, a4
+; RV64I-NEXT:    sw a1, %lo(ga)(a0)
+; RV64I-NEXT:    sw a3, %lo(gb)(a2)
+; RV64I-NEXT:    ret
+  %1 = load i32, i32* @ga, align 4
+  %2 = load i32, i32* @gb, align 4
+  %3 = add i32 %1, 3000
+  %4 = add i32 %2, 3000
+  store i32 %3, i32* @ga, align 4
+  store i32 %4, i32* @gb, align 4
+  ret void
+}